From 55fa69546c2ffbda51e4195b3d9d92bee1a68a4c Mon Sep 17 00:00:00 2001 From: Sanggyu Lee Date: Thu, 17 Jul 2025 10:34:45 +0900 Subject: [PATCH 1/3] [test] Add LlamaDecoderLayer using captured input It adds LlamaDecoderLayer test, which uses captured input. TICO-DCO-1.0-Signed-off-by: Sanggyu Lee --- .../model/LlamaDecoderLayerWithCache/model.py | 81 +++++++++++++++++++ .../requirements.txt | 1 + 2 files changed, 82 insertions(+) create mode 100644 test/modules/model/LlamaDecoderLayerWithCache/model.py create mode 100644 test/modules/model/LlamaDecoderLayerWithCache/requirements.txt diff --git a/test/modules/model/LlamaDecoderLayerWithCache/model.py b/test/modules/model/LlamaDecoderLayerWithCache/model.py new file mode 100644 index 00000000..e6a4d1d1 --- /dev/null +++ b/test/modules/model/LlamaDecoderLayerWithCache/model.py @@ -0,0 +1,81 @@ +# User input +prompt = "Lily picked up a flower." +model_name = "Maykeye/TinyLLama-v0" + +captured_input = None # type: ignore[var-annotated] + +import copy, inspect, types + +from transformers.models.llama.modeling_llama import LlamaDecoderLayer + +forward_old = LlamaDecoderLayer.forward + + +def capture_and_forward(self, *args, **kwargs): + global captured_input + + # Prepare args tuple for TICO.convert() + # Get arg_names in positional args order using inspect + sig = inspect.signature(forward_old) + args_names = [ + # signature includes `self`` and `kwargs``. + # Just retrieve the ordinary positional inputs only + name for name in sig.parameters.keys() if name not in ("self", "kwargs") + ] + + args_dict = dict(zip(args_names, args)) + args_dict.update(kwargs) + + def populate_args(args_dict, filter): + for key in filter: + args_dict.pop(key, None) + args_tuple = tuple(args_dict.get(name, None) for name in args_names) + return copy.deepcopy(args_tuple) + + if len(args_dict['past_key_value'].key_cache) != 0: + input_to_remove = [ "use_cache" ] + captured_input = populate_args(args_dict, input_to_remove) + + return forward_old(self, *args, **kwargs) + + +# Tokenizer +from transformers import AutoTokenizer + +tokenizer = AutoTokenizer.from_pretrained(model_name) +tokenizer.pad_token = tokenizer.eos_token +tokenizer.padding_side = "right" +inputs = tokenizer( + prompt, + return_tensors="pt", + padding="max_length", + max_length=32, + truncation=True, +) + + +# Generator +import torch + +from transformers import AutoModelForCausalLM + +model = AutoModelForCausalLM.from_pretrained(model_name) +model.eval() +model.model.layers[0].forward = types.MethodType(capture_and_forward, model.model.layers[0]) +with torch.no_grad(): + outputs = model.generate( + **inputs, + max_new_tokens=32, + do_sample=False, + pad_token_id=tokenizer.eos_token_id, + ) +generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) +print(generated_text) + +# Tico +import tico + +model = AutoModelForCausalLM.from_pretrained(model_name) +model.eval() +circle_model = tico.convert(model.model.layers[0], captured_input) +circle_model.save(f"llama.decoderlayer.circle") diff --git a/test/modules/model/LlamaDecoderLayerWithCache/requirements.txt b/test/modules/model/LlamaDecoderLayerWithCache/requirements.txt new file mode 100644 index 00000000..5393938f --- /dev/null +++ b/test/modules/model/LlamaDecoderLayerWithCache/requirements.txt @@ -0,0 +1 @@ +transformers>=4.50.1 From 5bc2ce63cc6849bc3aa47f7828e6248f33c6ca9b Mon Sep 17 00:00:00 2001 From: Sanggyu Lee Date: Thu, 17 Jul 2025 11:48:20 +0900 Subject: [PATCH 2/3] Make format happy --- .../model/LlamaDecoderLayerWithCache/model.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/test/modules/model/LlamaDecoderLayerWithCache/model.py b/test/modules/model/LlamaDecoderLayerWithCache/model.py index e6a4d1d1..a33fbe82 100644 --- a/test/modules/model/LlamaDecoderLayerWithCache/model.py +++ b/test/modules/model/LlamaDecoderLayerWithCache/model.py @@ -2,7 +2,7 @@ prompt = "Lily picked up a flower." model_name = "Maykeye/TinyLLama-v0" -captured_input = None # type: ignore[var-annotated] +captured_input = () import copy, inspect, types @@ -20,7 +20,9 @@ def capture_and_forward(self, *args, **kwargs): args_names = [ # signature includes `self`` and `kwargs``. # Just retrieve the ordinary positional inputs only - name for name in sig.parameters.keys() if name not in ("self", "kwargs") + name + for name in sig.parameters.keys() + if name not in ("self", "kwargs") ] args_dict = dict(zip(args_names, args)) @@ -32,8 +34,8 @@ def populate_args(args_dict, filter): args_tuple = tuple(args_dict.get(name, None) for name in args_names) return copy.deepcopy(args_tuple) - if len(args_dict['past_key_value'].key_cache) != 0: - input_to_remove = [ "use_cache" ] + if len(args_dict["past_key_value"].key_cache) != 0: + input_to_remove = ["use_cache"] captured_input = populate_args(args_dict, input_to_remove) return forward_old(self, *args, **kwargs) @@ -61,7 +63,9 @@ def populate_args(args_dict, filter): model = AutoModelForCausalLM.from_pretrained(model_name) model.eval() -model.model.layers[0].forward = types.MethodType(capture_and_forward, model.model.layers[0]) +model.model.layers[0].forward = types.MethodType( + capture_and_forward, model.model.layers[0] +) with torch.no_grad(): outputs = model.generate( **inputs, From fe498e299605edbed85ac354ec15ccd28fb8b8ef Mon Sep 17 00:00:00 2001 From: Sanggyu Lee Date: Thu, 17 Jul 2025 14:55:21 +0900 Subject: [PATCH 3/3] Rename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - forward_old → forward_org - output filename : llama → tinyllama - LlamaDecoderLayerWithCache → LlamaDecoderLayerWithKVCache --- .../model/LlamaDecoderLayerWithCache/model.py | 85 ------------------- .../requirements.txt | 1 - 2 files changed, 86 deletions(-) delete mode 100644 test/modules/model/LlamaDecoderLayerWithCache/model.py delete mode 100644 test/modules/model/LlamaDecoderLayerWithCache/requirements.txt diff --git a/test/modules/model/LlamaDecoderLayerWithCache/model.py b/test/modules/model/LlamaDecoderLayerWithCache/model.py deleted file mode 100644 index a33fbe82..00000000 --- a/test/modules/model/LlamaDecoderLayerWithCache/model.py +++ /dev/null @@ -1,85 +0,0 @@ -# User input -prompt = "Lily picked up a flower." -model_name = "Maykeye/TinyLLama-v0" - -captured_input = () - -import copy, inspect, types - -from transformers.models.llama.modeling_llama import LlamaDecoderLayer - -forward_old = LlamaDecoderLayer.forward - - -def capture_and_forward(self, *args, **kwargs): - global captured_input - - # Prepare args tuple for TICO.convert() - # Get arg_names in positional args order using inspect - sig = inspect.signature(forward_old) - args_names = [ - # signature includes `self`` and `kwargs``. - # Just retrieve the ordinary positional inputs only - name - for name in sig.parameters.keys() - if name not in ("self", "kwargs") - ] - - args_dict = dict(zip(args_names, args)) - args_dict.update(kwargs) - - def populate_args(args_dict, filter): - for key in filter: - args_dict.pop(key, None) - args_tuple = tuple(args_dict.get(name, None) for name in args_names) - return copy.deepcopy(args_tuple) - - if len(args_dict["past_key_value"].key_cache) != 0: - input_to_remove = ["use_cache"] - captured_input = populate_args(args_dict, input_to_remove) - - return forward_old(self, *args, **kwargs) - - -# Tokenizer -from transformers import AutoTokenizer - -tokenizer = AutoTokenizer.from_pretrained(model_name) -tokenizer.pad_token = tokenizer.eos_token -tokenizer.padding_side = "right" -inputs = tokenizer( - prompt, - return_tensors="pt", - padding="max_length", - max_length=32, - truncation=True, -) - - -# Generator -import torch - -from transformers import AutoModelForCausalLM - -model = AutoModelForCausalLM.from_pretrained(model_name) -model.eval() -model.model.layers[0].forward = types.MethodType( - capture_and_forward, model.model.layers[0] -) -with torch.no_grad(): - outputs = model.generate( - **inputs, - max_new_tokens=32, - do_sample=False, - pad_token_id=tokenizer.eos_token_id, - ) -generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) -print(generated_text) - -# Tico -import tico - -model = AutoModelForCausalLM.from_pretrained(model_name) -model.eval() -circle_model = tico.convert(model.model.layers[0], captured_input) -circle_model.save(f"llama.decoderlayer.circle") diff --git a/test/modules/model/LlamaDecoderLayerWithCache/requirements.txt b/test/modules/model/LlamaDecoderLayerWithCache/requirements.txt deleted file mode 100644 index 5393938f..00000000 --- a/test/modules/model/LlamaDecoderLayerWithCache/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -transformers>=4.50.1