Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
·
f42e9e5
1
Parent(s):
09d7140
Add padding="longest"
Browse files- main.py +3 -0
- pipeline.py +0 -1
main.py
CHANGED
|
@@ -164,6 +164,9 @@ def GetChatModel(compute_mode, ckpt_dir=None):
|
|
| 164 |
return_full_text=False,
|
| 165 |
# It seems that max_new_tokens has to be specified here, not in .invoke()
|
| 166 |
max_new_tokens=2000,
|
|
|
|
|
|
|
|
|
|
| 167 |
)
|
| 168 |
# We need the task so HuggingFacePipeline can deal with our class
|
| 169 |
pipe.task = "text-generation"
|
|
|
|
| 164 |
return_full_text=False,
|
| 165 |
# It seems that max_new_tokens has to be specified here, not in .invoke()
|
| 166 |
max_new_tokens=2000,
|
| 167 |
+
# Use padding for FlashAttention alignment
|
| 168 |
+
# https://github.com/google-deepmind/gemma/issues/169
|
| 169 |
+
padding="longest",
|
| 170 |
)
|
| 171 |
# We need the task so HuggingFacePipeline can deal with our class
|
| 172 |
pipe.task = "text-generation"
|
pipeline.py
CHANGED
|
@@ -22,7 +22,6 @@ class MyTextGenerationPipeline(TextGenerationPipeline):
|
|
| 22 |
continue_final_message=None,
|
| 23 |
**generate_kwargs,
|
| 24 |
):
|
| 25 |
-
print(f"PADDING: {padding}")
|
| 26 |
# Only set non-None tokenizer kwargs, so as to rely on the tokenizer's defaults
|
| 27 |
tokenizer_kwargs = {
|
| 28 |
"add_special_tokens": add_special_tokens,
|
|
|
|
| 22 |
continue_final_message=None,
|
| 23 |
**generate_kwargs,
|
| 24 |
):
|
|
|
|
| 25 |
# Only set non-None tokenizer kwargs, so as to rely on the tokenizer's defaults
|
| 26 |
tokenizer_kwargs = {
|
| 27 |
"add_special_tokens": add_special_tokens,
|