Christopher Capobianco
commited on
Commit
·
27cd57b
1
Parent(s):
963c420
Enable llama-cpp-python to load fine tuned and quantized LLM
Browse files- projects/07_LLM_Fine_Tuned.py +24 -24
projects/07_LLM_Fine_Tuned.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
|
| 3 |
import re
|
| 4 |
|
| 5 |
st.header('Fine Tuned LLM', divider='green')
|
|
@@ -29,36 +29,36 @@ def chat_action(prompt):
|
|
| 29 |
st.session_state["messages"].append({"role": "user", "content": prompt})
|
| 30 |
st.chat_message("user").write(prompt)
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
|
| 55 |
for msg in st.session_state.messages:
|
| 56 |
if msg["role"] != "system":
|
| 57 |
with st.chat_message(name=msg["role"]):
|
| 58 |
st.write(msg["content"])
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
|
| 63 |
if prompt := st.chat_input():
|
| 64 |
chat_action(prompt)
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
from llama_cpp import Llama
|
| 3 |
import re
|
| 4 |
|
| 5 |
st.header('Fine Tuned LLM', divider='green')
|
|
|
|
| 29 |
st.session_state["messages"].append({"role": "user", "content": prompt})
|
| 30 |
st.chat_message("user").write(prompt)
|
| 31 |
|
| 32 |
+
with st.spinner(f"Generating response"):
|
| 33 |
+
response = llm.create_chat_completion(
|
| 34 |
+
messages=st.session_state.messages,
|
| 35 |
+
temperature = 0.7,
|
| 36 |
+
repeat_penalty = 1.1,
|
| 37 |
+
stop = "[/INST]"
|
| 38 |
+
)
|
| 39 |
+
msg = response['choices'][0]['message']['content']
|
| 40 |
+
msg = re.sub(r'(<<|\[)*(INST|SYS)(>>|\])*', '', msg)
|
| 41 |
+
st.session_state["messages"].append({"role": "assistant", "content": msg})
|
| 42 |
+
st.chat_message("assistant").write(msg)
|
| 43 |
|
| 44 |
+
@st.cache_resource
|
| 45 |
+
def load_llm():
|
| 46 |
+
#### Import Model from Huggingface
|
| 47 |
+
llm = Llama.from_pretrained(
|
| 48 |
+
repo_id="ccapo/llama-3.1-8b-chat-math-teacher-GGUF",
|
| 49 |
+
filename="*Q4_K_M.gguf",
|
| 50 |
+
verbose=False,
|
| 51 |
+
n_ctx=2048
|
| 52 |
+
)
|
| 53 |
+
return llm
|
| 54 |
|
| 55 |
for msg in st.session_state.messages:
|
| 56 |
if msg["role"] != "system":
|
| 57 |
with st.chat_message(name=msg["role"]):
|
| 58 |
st.write(msg["content"])
|
| 59 |
|
| 60 |
+
with st.spinner(f"Loading LLM"):
|
| 61 |
+
llm = load_llm()
|
| 62 |
|
| 63 |
if prompt := st.chat_input():
|
| 64 |
chat_action(prompt)
|