alibaba · haiasd · Oct 15, 2025 · Oct 11, 2025 · Oct 15, 2025 · Oct 15, 2025
diff --git a/chatlearn/algorithm/grpo_utils/megatron_utils/train_helper.py b/chatlearn/algorithm/grpo_utils/megatron_utils/train_helper.py
@@ -393,7 +393,6 @@ def get_batch(
             {
                 "pixel_values": data_b['pixel_values'], # [token_length, token_num]
                 "image_grid_thw": data_b['image_grid_thw'], # [batch_size, 3]
-                "rope_deltas": data_b['rope_deltas'], # [batch_size, 1]
                 "image_input_mask": tokens==get_tokenizer().vocab['<|image_pad|>'] # [batch_size, token_length]
             }
         )

diff --git a/chatlearn/algorithm/grpo_utils/policy_trainer.py b/chatlearn/algorithm/grpo_utils/policy_trainer.py
@@ -103,7 +103,6 @@ def preprocess_data_list(self, data_list: List[Dict[str, Any]], training: bool):
 
             # for vl
             position_ids = data_b.get("position_ids", None)
-            rope_deltas = data_b.get("rope_deltas", None)
             pixel_values = data_b.get("pixel_values", None)
             image_grid_thw = data_b.get("image_grid_thw", None)
 
@@ -151,7 +150,6 @@ def preprocess_data_list(self, data_list: List[Dict[str, Any]], training: bool):
                     {
                         "pixel_values": pixel_values, # [token_length, token_num]
                         "image_grid_thw": image_grid_thw, # [batch_size, 3]
-                        "rope_deltas": rope_deltas # [batch_size, 1]
                     }
                 )
 
@@ -196,8 +194,7 @@ def train_step(self, data_list: List[Dict[str, Any]], **kwargs): # pylint: disab
                     image_grid_thw=inputs['image_grid_thw'],
                     attention_mask=None,
                     position_ids=inputs['position_ids'],
-                    use_cache=False,
-                    rope_deltas=inputs['rope_deltas']
+                    use_cache=False
                 )
             else:
                 output = self.model(
@@ -315,8 +312,7 @@ def forward_step(self, data: List[Dict[str, Any]], **kwargs) -> List[Dict[str, A
                         image_grid_thw=inputs['image_grid_thw'],
                         attention_mask=None,
                         position_ids=inputs['position_ids'],
-                        use_cache=False,
-                        rope_deltas=inputs['rope_deltas']
+                        use_cache=False
                     )
                 else:
                     output = self.model(

diff --git a/chatlearn/algorithm/grpo_utils/trainer_utils.py b/chatlearn/algorithm/grpo_utils/trainer_utils.py
@@ -139,7 +139,7 @@ def batching(data_list: List[Dict[str, Any]]) -> Dict[str, Any]:
     batched_data = defaultdict(list)
     for key in data_list[0]:
         batched_data[key] = [data[key] for data in data_list]
-        if key in ['pixel_values', 'image_grid_thw', 'rope_deltas']:
+        if key in ['pixel_values', 'image_grid_thw']:
             batched_data[key] = torch.cat(batched_data[key], dim=0)
         elif isinstance(batched_data[key][0], torch.Tensor):
             batched_data[key] = padding_tensor(batched_data[key])

diff --git a/chatlearn/data/data_preprocess/geo3k.py b/chatlearn/data/data_preprocess/geo3k.py
@@ -3,9 +3,34 @@
 """
 
 import argparse
+import base64
 import os
+from typing import List, Dict
+from io import BytesIO
 
 import datasets
+from PIL import Image
+
+def image_to_base64(img: Image.Image) -> str:
+
+    img = img.convert("RGB")
+    buffered = BytesIO()
+    img.save(buffered, format="JPEG", quality=100)
+    img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
+    return img_str
+
+def prepare_image_content(img_list: List[Image.Image]) -> List[Dict]:
+    img_content = []
+
+    for img in img_list:
+        img_content.append(
+            {
+                "type": "image_url",
+                "image_url": f"data:image;base64,{image_to_base64(img)}"
+            }
+        )
+        assert img_content[-1]["image_url"] is not None
+    return img_content
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
@@ -32,18 +57,24 @@ def make_map_fn(split):
         def process_fn(example, idx):
             problem = example.pop("problem")
             prompt = problem + " " + instruction_following
+            prompt = prompt.replace("<image>", "")
             answer = example.pop("answer")
             images = example.pop("images")
+            # format openai style messages
+            messages = [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": prompt}
+                    ]
+                }
+            ]
+            messages[0]["content"] = prepare_image_content(images)+ \
+                messages[0]["content"]
 
             data = {
                 "data_source": data_source,
-                "prompt": [
-                    {
-                        "role": "user",
-                        "content": prompt,
-                    }
-                ],
-                "images": images,
+                "messages": messages,
                 "ability": "math",
                 "reward_model": {"style": "rule", "ground_truth": answer},
                 "extra_info": {
@@ -63,5 +94,6 @@ def process_fn(example, idx):
     local_dir = args.local_dir
     hdfs_dir = args.hdfs_dir
 
+    # to_parquet may produce key: None in dict
     train_dataset.to_parquet(os.path.join(local_dir, "train.parquet"))
     test_dataset.to_parquet(os.path.join(local_dir, "test.parquet"))
diff --git a/chatlearn/data/data_preprocess/geo3k_agent.py b/chatlearn/data/data_preprocess/geo3k_agent.py
@@ -0,0 +1,109 @@
+"""
+Preprocess the Geometry3k dataset to parquet format
+"""
+
+import argparse
+import base64
+import os
+from typing import List, Dict
+from io import BytesIO
+
+import datasets
+from PIL import Image
+
+def image_to_base64(img: Image.Image) -> str:
+
+    img = img.convert("RGB")
+    buffered = BytesIO()
+    img.save(buffered, format="JPEG", quality=100)
+    img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
+    return img_str
+
+def prepare_image_content(img_list: List[Image.Image]) -> List[Dict]:
+    img_content = []
+
+    for img in img_list:
+        img_content.append(
+            {
+                "type": "image_url",
+                "image_url": f"data:image;base64,{image_to_base64(img)}"
+            }
+        )
+        assert img_content[-1]["image_url"] is not None
+    return img_content
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--local_dir", default="dataset/geo3k")
+    parser.add_argument("--hdfs_dir", default=None)
+
+    args = parser.parse_args()
+
+    data_source = "hiyouga/geometry3k"
+
+    dataset = datasets.load_dataset(data_source)
+
+    train_dataset = dataset["train"]
+    test_dataset = dataset["test"]
+
+    instruction_following = (
+        r"You FIRST think about the reasoning process as an internal monologue and then provide the final answer. "
+        r"The reasoning process MUST BE enclosed within <think> </think> tags. "
+        r"You must use the `calc_geo3k_reward` tool after step by step solving the question"
+        r"The final answer MUST BE put in \boxed{}."
+    )
+
+    system_prompt = (
+                    "You are a math expert. You are given a question and you need to solve it step by step. "
+                    "Reasoning step by step before any tool call. "
+                    "You should use the `calc_geo3k_reward` tool after step by step solving the question, "
+                    "before generate final answer at least once and refine your answer if necessary. "
+                    "Put your final answer within \\boxed{}."
+                )
+    # add a row to each data item that represents a unique id
+    def make_map_fn(split):
+        def process_fn(example, idx):
+            problem = example.pop("problem")
+            prompt = problem + " " + instruction_following
+            prompt = prompt.replace("<image>", "")
+            answer = example.pop("answer")
+            images = example.pop("images")
+            # format openai style messages
+            messages = [
+                {"role": "system", "content": [{"type": "text", "text": system_prompt}]},
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": prompt}
+                    ]
+                }
+            ]
+            messages[-1]["content"] = prepare_image_content(images)+ \
+                messages[-1]["content"]
+            data = {
+                "agent_name": "geo3k_agent",
+                "agent_cfg_path": "template/agent/geo3k_eval.yaml",
+                "data_source": data_source,
+                "messages": messages,
+                "ability": "math",
+                "reward_model": {"style": "rule", "ground_truth": answer},
+                "extra_info": {
+                    "split": split,
+                    "index": idx,
+                    "answer": answer,
+                    "question": problem,
+                },
+            }
+            return data
+
+        return process_fn
+
+    train_dataset = train_dataset.map(function=make_map_fn("train"), with_indices=True, num_proc=8)
+    test_dataset = test_dataset.map(function=make_map_fn("test"), with_indices=True, num_proc=8)
+
+    local_dir = args.local_dir
+    hdfs_dir = args.hdfs_dir
+
+    # to_parquet may produce key: None in dict
+    train_dataset.to_parquet(os.path.join(local_dir, "train_agent.parquet"))
+    test_dataset.to_parquet(os.path.join(local_dir, "test_agent.parquet"))