From 65f3cb6561f7bde99e6fa39489452570d810656f Mon Sep 17 00:00:00 2001 From: Christian Weilbach Date: Fri, 12 Apr 2024 06:58:32 +0000 Subject: [PATCH] Factor text-extraction runtime. --- .../simm/runtimes/relational_assistance.clj | 60 +--------- src/ie/simm/runtimes/text_extractor.clj | 103 ++++++++++++++++++ src/ie/simm/towers.clj | 7 +- 3 files changed, 111 insertions(+), 59 deletions(-) create mode 100644 src/ie/simm/runtimes/text_extractor.clj diff --git a/src/ie/simm/runtimes/relational_assistance.clj b/src/ie/simm/runtimes/relational_assistance.clj index c14ae03..0ce5754 100644 --- a/src/ie/simm/runtimes/relational_assistance.clj +++ b/src/ie/simm/runtimes/relational_assistance.clj @@ -34,7 +34,7 @@ conn) (catch Exception _ (d/connect cfg)))] - (d/transact conn default-schema) + #_(d/transact conn default-schema) (swap! peer assoc-in [:conn chat-id] conn) conn))) @@ -101,26 +101,6 @@ (when (seq tags) {:message/tag tags}))])))) - -;; TODO factor into youtube middleware -;; require libpython - -(require '[libpython-clj2.require :refer [require-python]] - '[libpython-clj2.python :refer [py. py.. py.-] :as py]) - -(require-python '[youtube_transcript_api :refer [YouTubeTranscriptApi]]) - -(defn youtube-transcript [video-id] - ;; " ".join([t['text'] for t in transcript]) - (let [transcript (py. YouTubeTranscriptApi get_transcript video-id)] - (str/join " " (map :text transcript)))) - -(comment - (youtube-transcript "20TAkcy3aBY") - - ) - - (def window-size 10) (defn summarize [S conn conv chat] @@ -195,33 +175,6 @@ (.close zip-out) zip-file)) -(defn extract-url [S text chat] - (go-try S - (if-let [;; if text matches http or https web URL extrect URL with regex - url (re-find #"https?://\S+" text)] - (if-let [;; extract youtube video id from URL - youtube-id (second (or (re-find #"youtube.com/watch\?v=([^&]+)" url) - (re-find #"youtu.be/([^\?]+)" url)))] - (try - (debug "summarizing youtube transcript" youtube-id) - (let [transcript (youtube-transcript youtube-id) - summary (? put? go-for] :as sasync] + [clojure.core.async :refer [chan pub sub mult tap timeout] :as async] + [taoensso.timbre :refer [debug info warn error]] + [hasch.core :refer [uuid]] + [clojure.string :as str] + [libpython-clj2.require :refer [require-python]] + [libpython-clj2.python :refer [py. py.. py.-] :as py])) + +(require-python '[youtube_transcript_api :refer [YouTubeTranscriptApi]]) + +(defn youtube-transcript [video-id] + ;; " ".join([t['text'] for t in transcript]) + (let [transcript (py. YouTubeTranscriptApi get_transcript video-id)] + (str/join " " (map :text transcript)))) + +(comment + (youtube-transcript "20TAkcy3aBY") + + ) + +(defn extract-url [S text chat] + (go-try S + (if-let [;; if text matches http or https web URL extrect URL with regex + url (re-find #"https?://\S+" text)] + (if-let [;; extract youtube video id from URL + youtube-id (second (or (re-find #"youtube.com/watch\?v=([^&]+)" url) + (re-find #"youtu.be/([^\?]+)" url)))] + (try + (debug "summarizing youtube transcript" youtube-id) + (let [transcript (youtube-transcript youtube-id) + summary (? S next-in m)) + (catch Exception e + (let [error-id (uuid)] + (error "Could not process message(" error-id "): " m e) + (