Skip to content

Commit

Permalink
llama.cpp summarization working
Browse files Browse the repository at this point in the history
Llama.cpp summarization works.
  • Loading branch information
rmusser01 committed May 7, 2024
1 parent d4a15f2 commit 1ff1be9
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 52 deletions.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,21 @@ By default videos, transcriptions and summaries are stored in a folder with the
* FIXME
8. For feeding the transcriptions to the API of your choice, simply use the corresponding script for your API provider.
* FIXME: add scripts for OpenAI api (generic) and others
- **Setting up Local LLM Runner**
- **Llama.cpp**
- **Linux & Mac**
1. `git clone https://github.com/ggerganov/llama.cpp`
2. `make` in the `llama.cpp` folder
3. `./server -m ../path/to/model -c <context_size>`
- **Windows**
1. `git clone https://github.com/ggerganov/llama.cpp/tree/master/examples/server`
2. Download + Run: https://github.com/skeeto/w64devkit/releases
3. cd to `llama.cpp` folder make` in the `llama.cpp` folder
4. `server.exe -m ..\path\to\model -c <context_size>`
- **Kobold.cpp**
- **Exvllama2**



### Usage
- Single file (remote URL) transcription
Expand Down
2 changes: 1 addition & 1 deletion config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ openai_model = gpt-4-turbo
cohere_api_key = <your_cohere_api_key>
cohere_model = command-r-plus
llama_api_key = <llama.cpp api key>
llama_api_IP = <IP:port of llama.cpp server>
llama_api_IP = http://127.0.0.1:8080/completion

[Paths]
output_path = Results
Expand Down
92 changes: 41 additions & 51 deletions diarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@
cohere_api_key = config.get('API', 'cohere_api_key', fallback=None)
anthropic_api_key = config.get('API', 'anthropic_api_key', fallback=None)
openai_api_key = config.get('API', 'openai_api_key', fallback=None)
llama_api_key = config.get('API', 'llama_api_key', fallback=None)

# Models
anthropic_model = config.get('API', 'anthropic_model', fallback='claude-3-sonnet-20240229')
cohere_model = config.get('API', 'cohere_model', fallback='command-r-plus')
openai_model = config.get('API', 'openai_model', fallback='gpt-4-turbo')

# Local-Models
llama_ip = config.get('API', 'llama_api_IP', fallback='127.0.0.1:8080/v1/chat/completions')
llama_api_IP = config.get('API', 'llama_api_IP', fallback='127.0.0.1:8080/v1/chat/completions')
llama_api_key = config.get('API', 'llama_api_key', fallback='')

# Retrieve output paths from the configuration file
output_path = config.get('Paths', 'output_path', fallback='Results')
Expand Down Expand Up @@ -678,7 +678,7 @@ def summarize_with_claude(api_key, file_path, model):
with open(file_path, 'r') as file:
segments = json.load(file)

logging.debug("anthropic: Extracting text from the segments")
logging.debug("anthropic: Extracting text from the segments file")
text = extract_text_from_segments(segments)

headers = {
Expand Down Expand Up @@ -748,7 +748,7 @@ def summarize_with_cohere(api_key, file_path, model):
with open(file_path, 'r') as file:
segments = json.load(file)

logging.debug(f"cohere: Extracting text from {segments}")
logging.debug(f"cohere: Extracting text from segments file")
text = extract_text_from_segments(segments) # Make sure this function is defined

headers = {
Expand Down Expand Up @@ -792,64 +792,50 @@ def summarize_with_cohere(api_key, file_path, model):
return f"cohere: Error occurred while processing summary with Cohere: {str(e)}"


"""

if response.status_code == 200:
json_response = response.json()
if 'response' in json_response:
summary = json_response['response'].strip()
print("Summary processed successfully.")
return summary
else:
logging.debug("Unexpected JSON structure: %s", json_response)
return "Error: Unexpected JSON structure."
else:
logging.debug("cohere: Unsuccessful request :(")
print("Failed to process summary:", response.text)
return None
"""


def summarize_with_llama(api_url, file_path):
def summarize_with_llama(api_url, file_path, token):
try:
logging.debug("llama: Loading JSON data")
with open(file_path, 'r') as file:
segments = json.load(file)

logging.debug("llama: Extracting text from segments")
text = extract_text_from_segments(segments)

logging.debug("llama: Preparing data + prompt for submittal")
logging.debug(f"llama: Extracting text from segments file")
text = extract_text_from_segments(segments) # Define this function to extract text properly

headers = {
'accept': 'application/json',
'content-type': 'application/json',
}
if len(token)>5:
headers['Authorization'] = f'Bearer {token}'


prompt_text = f"{text} \n\nAs a professional summarizer, create a concise and comprehensive summary of the provided text."
data = {
"prompt": f"{text} \n\n\n\nPlease provide a detailed, bulleted list of the points made throughout the transcribed video and any supporting arguments made for said points",
"max_tokens": 4096,
"stop": ["\n\nHuman:"],
"temperature": 0.7,
"top_k": 0,
"top_p": 1.0,
"repeat_penalty": 1.0,
"repeat_last_n": 64,
"seed": -1,
"threads": 4,
"n_predict": 4096
"prompt": prompt_text
}

logging.debug("llama: POSTing data to API endpoint")
response = requests.post(api_url, json=data)
logging.debug("llama: Submitting request to API endpoint")
response = requests.post(api_url, headers=headers, json=data)
response_data = response.json()
logging.debug("API Response Data: %s", response_data)

if response.status_code == 200:
logging.debug("llama: POST Successful")
summary = response.json()['content'].strip()
print("Summary processed successfully.")
return summary
if 'summary' in response_data:
summary = response_data['summary'].strip()
logging.debug("llama: Summarization successful")
return summary
else:
logging.error("Expected 'summary' key not found in API response.")
return "Expected 'summary' key not found in API response."
else:
logging.debug("llama: Unsuccessful POST")
print("Failed to process summary:", response.text)
return None
logging.error(f"llama: API request failed with status code {response.status_code}: {response.text}")
return f"llama: API request failed: {response.text}"

except Exception as e:
logging.debug("llama: Generalized error, see above")
print("Error occurred while processing summary with llama.cpp:", str(e))
return None
logging.error("llama: Error in processing: %s", str(e))
return f"llama: Error occurred while processing summary with llama: {str(e)}"



def save_summary_to_file(summary, file_path):
Expand All @@ -871,8 +857,8 @@ def save_summary_to_file(summary, file_path):
####################################################################################################################################
# Main()
#

def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False):
start_time = time.monotonic()
if os.path.isfile(input_path) and input_path.endswith('.txt'):
logging.debug("MAIN: User passed in a text file, processing text file...")
paths = read_paths_from_file(input_path)
Expand Down Expand Up @@ -928,7 +914,9 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
api_key = cohere_api_key
summary = summarize_with_cohere(api_key, json_file_path, cohere_model)
elif api_name.lower() == 'llama':
summary = summarize_with_llmaa(llama_ip, json_file_path)
token = llama_api_key
llama_ip = llama_api_IP
summary = summarize_with_llama(llama_ip, json_file_path, token)
else:
logging.warning(f"Unsupported API: {api_name}")
summary = None
Expand All @@ -943,6 +931,8 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
except Exception as e:
logging.error(f"Error processing path: {path}")
logging.error(str(e))
end_time = time.monotonic()
print("Total program execution time: " + timedelta(seconds=end_time - start_time))

return results

Expand Down

0 comments on commit 1ff1be9

Please sign in to comment.