diff --git a/README.md b/README.md
index dd2c5b1eedc..0c06b73f05b 100644
--- a/README.md
+++ b/README.md
@@ -96,10 +96,11 @@ For enterprise inquiries, please contact: **business@nextchat.dev**
 - [x] Artifacts: Easily preview, copy and share generated content/webpages through a separate window [#5092](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/pull/5092)
 - [x] Plugins: support network search, calculator, any other apis etc. [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
   - [x] network search, calculator, any other apis etc. [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
+- [x] Supports Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
 - [ ] local knowledge base
 
 ## What's New
-
+- 🚀 v2.15.8 Now supports Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
 - 🚀 v2.15.4 The Application supports using Tauri fetch LLM API, MORE SECURITY! [#5379](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5379)
 - 🚀 v2.15.0 Now supports Plugins! Read this: [NextChat-Awesome-Plugins](https://github.com/ChatGPTNextWeb/NextChat-Awesome-Plugins)
 - 🚀 v2.14.0 Now supports  Artifacts & SD 
@@ -134,10 +135,11 @@ For enterprise inquiries, please contact: **business@nextchat.dev**
 - [x] Artifacts: 通过独立窗口,轻松预览、复制和分享生成的内容/可交互网页 [#5092](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/pull/5092)
 - [x] 插件机制,支持`联网搜索`、`计算器`、调用其他平台 api [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
    - [x] 支持联网搜索、计算器、调用其他平台 api [#165](https://github.com/Yidadaa/ChatGPT-Next-Web/issues/165) [#5353](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5353)
+ - [x] 支持 Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
  - [ ] 本地知识库
 
 ## 最新动态
-
+- 🚀 v2.15.8 现在支持Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
 - 🚀 v2.15.4 客户端支持Tauri本地直接调用大模型API,更安全![#5379](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5379)
 - 🚀 v2.15.0 现在支持插件功能了!了解更多:[NextChat-Awesome-Plugins](https://github.com/ChatGPTNextWeb/NextChat-Awesome-Plugins)
 - 🚀 v2.14.0 现在支持 Artifacts & SD 了。
@@ -301,6 +303,14 @@ iflytek Api Key.
 
 iflytek Api Secret.
 
+### `CHATGLM_API_KEY` (optional)
+
+ChatGLM Api Key.
+
+### `CHATGLM_URL` (optional)
+
+ChatGLM Api Url.
+
 ### `HIDE_USER_API_KEY` (optional)
 
 > Default: Empty
diff --git a/README_CN.md b/README_CN.md
index ccdcf28ffe9..d4da8b9da13 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -184,6 +184,13 @@ ByteDance Api Url.
 
 讯飞星火Api Secret.
 
+### `CHATGLM_API_KEY` (可选)
+
+ChatGLM Api Key.
+
+### `CHATGLM_URL` (可选)
+
+ChatGLM Api Url.
 
 
 ### `HIDE_USER_API_KEY` (可选)
diff --git a/app/api/common.ts b/app/api/common.ts
index b4c792d6ff0..495a12ccdbb 100644
--- a/app/api/common.ts
+++ b/app/api/common.ts
@@ -1,8 +1,8 @@
 import { NextRequest, NextResponse } from "next/server";
 import { getServerSideConfig } from "../config/server";
 import { OPENAI_BASE_URL, ServiceProvider } from "../constant";
-import { isModelAvailableInServer } from "../utils/model";
 import { cloudflareAIGatewayUrl } from "../utils/cloudflare";
+import { getModelProvider, isModelAvailableInServer } from "../utils/model";
 
 const serverConfig = getServerSideConfig();
 
@@ -71,7 +71,7 @@ export async function requestOpenai(req: NextRequest) {
         .filter((v) => !!v && !v.startsWith("-") && v.includes(modelName))
         .forEach((m) => {
           const [fullName, displayName] = m.split("=");
-          const [_, providerName] = fullName.split("@");
+          const [_, providerName] = getModelProvider(fullName);
           if (providerName === "azure" && !displayName) {
             const [_, deployId] = (serverConfig?.azureUrl ?? "").split(
               "deployments/",
diff --git a/app/api/openai.ts b/app/api/openai.ts
index bbba69e569c..7be7994d056 100644
--- a/app/api/openai.ts
+++ b/app/api/openai.ts
@@ -14,8 +14,11 @@ function getModels(remoteModelRes: OpenAIListModelResponse) {
   if (config.disableGPT4) {
     remoteModelRes.data = remoteModelRes.data.filter(
       (m) =>
-        !(m.id.startsWith("gpt-4") || m.id.startsWith("chatgpt-4o")) ||
-        m.id.startsWith("gpt-4o-mini"),
+        !(
+          m.id.startsWith("gpt-4") ||
+          m.id.startsWith("chatgpt-4o") ||
+          m.id.startsWith("o1")
+        ) || m.id.startsWith("gpt-4o-mini"),
     );
   }
 
diff --git a/app/api/proxy.ts b/app/api/proxy.ts
index 731003aa1ea..d75db84b6f9 100644
--- a/app/api/proxy.ts
+++ b/app/api/proxy.ts
@@ -1,4 +1,5 @@
 import { NextRequest, NextResponse } from "next/server";
+import { getServerSideConfig } from "@/app/config/server";
 
 export async function handle(
   req: NextRequest,
@@ -9,6 +10,7 @@ export async function handle(
   if (req.method === "OPTIONS") {
     return NextResponse.json({ body: "OK" }, { status: 200 });
   }
+  const serverConfig = getServerSideConfig();
 
   // remove path params from searchParams
   req.nextUrl.searchParams.delete("path");
@@ -31,6 +33,18 @@ export async function handle(
       return true;
     }),
   );
+  // if dalle3 use openai api key
+  const baseUrl = req.headers.get("x-base-url");
+  if (baseUrl?.includes("api.openai.com")) {
+    if (!serverConfig.apiKey) {
+      return NextResponse.json(
+        { error: "OpenAI API key not configured" },
+        { status: 500 },
+      );
+    }
+    headers.set("Authorization", `Bearer ${serverConfig.apiKey}`);
+  }
+
   const controller = new AbortController();
   const fetchOptions: RequestInit = {
     headers,
diff --git a/app/client/platforms/google.ts b/app/client/platforms/google.ts
index 53ff00aeed0..a7bce4fc2d0 100644
--- a/app/client/platforms/google.ts
+++ b/app/client/platforms/google.ts
@@ -29,7 +29,7 @@ import { RequestPayload } from "./openai";
 import { fetch } from "@/app/utils/stream";
 
 export class GeminiProApi implements LLMApi {
-  path(path: string): string {
+  path(path: string, shouldStream = false): string {
     const accessStore = useAccessStore.getState();
 
     let baseUrl = "";
@@ -51,8 +51,10 @@ export class GeminiProApi implements LLMApi {
     console.log("[Proxy Endpoint] ", baseUrl, path);
 
     let chatPath = [baseUrl, path].join("/");
+    if (shouldStream) {
+      chatPath += chatPath.includes("?") ? "&alt=sse" : "?alt=sse";
+    }
 
-    chatPath += chatPath.includes("?") ? "&alt=sse" : "?alt=sse";
     return chatPath;
   }
   extractMessage(res: any) {
@@ -60,6 +62,7 @@ export class GeminiProApi implements LLMApi {
 
     return (
       res?.candidates?.at(0)?.content?.parts.at(0)?.text ||
+      res?.at(0)?.candidates?.at(0)?.content?.parts.at(0)?.text ||
       res?.error?.message ||
       ""
     );
@@ -166,7 +169,10 @@ export class GeminiProApi implements LLMApi {
     options.onController?.(controller);
     try {
       // https://github.com/google-gemini/cookbook/blob/main/quickstarts/rest/Streaming_REST.ipynb
-      const chatPath = this.path(Google.ChatPath(modelConfig.model));
+      const chatPath = this.path(
+        Google.ChatPath(modelConfig.model),
+        shouldStream,
+      );
 
       const chatPayload = {
         method: "POST",
diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts
index 6e893ed148f..15cfb7ca602 100644
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@@ -65,6 +65,7 @@ export interface RequestPayload {
   frequency_penalty: number;
   top_p: number;
   max_tokens?: number;
+  max_completion_tokens?: number;
 }
 
 export interface DalleRequestPayload {
@@ -223,7 +224,7 @@ export class ChatGPTApi implements LLMApi {
       // O1 not support image, tools (plugin in ChatGPTNextWeb) and system, stream, logprobs, temperature, top_p, n, presence_penalty, frequency_penalty yet.
       requestPayload = {
         messages,
-        stream: !isO1 ? options.config.stream : false,
+        stream: options.config.stream,
         model: modelConfig.model,
         temperature: !isO1 ? modelConfig.temperature : 1,
         presence_penalty: !isO1 ? modelConfig.presence_penalty : 0,
@@ -233,6 +234,11 @@ export class ChatGPTApi implements LLMApi {
         // Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
       };
 
+      // O1 使用 max_completion_tokens 控制token数 (https://platform.openai.com/docs/guides/reasoning#controlling-costs)
+      if (isO1) {
+        requestPayload["max_completion_tokens"] = modelConfig.max_tokens;
+      }
+
       // add max_tokens to vision model
       if (visionModel) {
         requestPayload["max_tokens"] = Math.max(modelConfig.max_tokens, 4000);
@@ -241,7 +247,7 @@ export class ChatGPTApi implements LLMApi {
 
     console.log("[Request] openai payload: ", requestPayload);
 
-    const shouldStream = !isDalle3 && !!options.config.stream && !isO1;
+    const shouldStream = !isDalle3 && !!options.config.stream;
     const controller = new AbortController();
     options.onController?.(controller);
 
diff --git a/app/components/auth.tsx b/app/components/auth.tsx
index 539a52eecc9..5375bda3f70 100644
--- a/app/components/auth.tsx
+++ b/app/components/auth.tsx
@@ -18,6 +18,8 @@ import {
   trackSettingsPageGuideToCPaymentClick,
   trackAuthorizationPageButtonToCPaymentClick,
 } from "../utils/auth-settings-events";
+import clsx from "clsx";
+
 const storage = safeLocalStorage();
 
 export function AuthPage() {
@@ -54,7 +56,7 @@ export function AuthPage() {
           onClick={() => navigate(Path.Home)}
         ></IconButton>
       </div>
-      <div className={`no-dark ${styles["auth-logo"]}`}>
+      <div className={clsx("no-dark", styles["auth-logo"])}>
         <BotIcon />
       </div>
 
@@ -163,7 +165,7 @@ function TopBanner() {
       onMouseEnter={handleMouseEnter}
       onMouseLeave={handleMouseLeave}
     >
-      <div className={`${styles["top-banner-inner"]} no-dark`}>
+      <div className={clsx(styles["top-banner-inner"], "no-dark")}>
         <Logo className={styles["top-banner-logo"]}></Logo>
         <span>
           {Locale.Auth.TopTips}
diff --git a/app/components/button.tsx b/app/components/button.tsx
index 87b4abd30f9..157d5d73da5 100644
--- a/app/components/button.tsx
+++ b/app/components/button.tsx
@@ -2,6 +2,7 @@ import * as React from "react";
 
 import styles from "./button.module.scss";
 import { CSSProperties } from "react";
+import clsx from "clsx";
 
 export type ButtonType = "primary" | "danger" | null;
 
@@ -22,12 +23,16 @@ export function IconButton(props: {
 }) {
   return (
     <button
-      className={
-        styles["icon-button"] +
-        ` ${props.bordered && styles.border} ${props.shadow && styles.shadow} ${
-          props.className ?? ""
-        } clickable ${styles[props.type ?? ""]}`
-      }
+      className={clsx(
+        "clickable",
+        styles["icon-button"],
+        {
+          [styles.border]: props.bordered,
+          [styles.shadow]: props.shadow,
+        },
+        styles[props.type ?? ""],
+        props.className,
+      )}
       onClick={props.onClick}
       title={props.title}
       disabled={props.disabled}
@@ -40,10 +45,9 @@ export function IconButton(props: {
       {props.icon && (
         <div
           aria-label={props.text || props.title}
-          className={
-            styles["icon-button-icon"] +
-            ` ${props.type === "primary" && "no-dark"}`
-          }
+          className={clsx(styles["icon-button-icon"], {
+            "no-dark": props.type === "primary",
+          })}
         >
           {props.icon}
         </div>
diff --git a/app/components/chat-list.tsx b/app/components/chat-list.tsx
index 03b1a5c8803..63dc4d5ff30 100644
--- a/app/components/chat-list.tsx
+++ b/app/components/chat-list.tsx
@@ -18,6 +18,7 @@ import { Mask } from "../store/mask";
 import { useRef, useEffect } from "react";
 import { showConfirm } from "./ui-lib";
 import { useMobileScreen } from "../utils";
+import clsx from "clsx";
 
 export function ChatItem(props: {
   onClick?: () => void;
@@ -45,11 +46,11 @@ export function ChatItem(props: {
     <Draggable draggableId={`${props.id}`} index={props.index}>
       {(provided) => (
         <div
-          className={`${styles["chat-item"]} ${
-            props.selected &&
-            (currentPath === Path.Chat || currentPath === Path.Home) &&
-            styles["chat-item-selected"]
-          }`}
+          className={clsx(styles["chat-item"], {
+            [styles["chat-item-selected"]]:
+              props.selected &&
+              (currentPath === Path.Chat || currentPath === Path.Home),
+          })}
           onClick={props.onClick}
           ref={(ele) => {
             draggableRef.current = ele;
@@ -63,7 +64,7 @@ export function ChatItem(props: {
         >
           {props.narrow ? (
             <div className={styles["chat-item-narrow"]}>
-              <div className={styles["chat-item-avatar"] + " no-dark"}>
+              <div className={clsx(styles["chat-item-avatar"], "no-dark")}>
                 <MaskAvatar
                   avatar={props.mask.avatar}
                   model={props.mask.modelConfig.model}
diff --git a/app/components/chat.module.scss b/app/components/chat.module.scss
index 73542fc67f1..7560d030533 100644
--- a/app/components/chat.module.scss
+++ b/app/components/chat.module.scss
@@ -45,6 +45,14 @@
 .chat-input-actions {
   display: flex;
   flex-wrap: wrap;
+  justify-content: space-between;
+  gap: 5px;
+
+  &-end {
+    display: flex;
+    margin-left: auto;
+    gap: 5px;
+  }
 
   .chat-input-action {
     display: inline-flex;
@@ -62,10 +70,6 @@
     width: var(--icon-width);
     overflow: hidden;
 
-    &:not(:last-child) {
-      margin-right: 5px;
-    }
-
     .text {
       white-space: nowrap;
       padding-left: 5px;
@@ -231,10 +235,12 @@
 
   animation: slide-in ease 0.3s;
 
-  $linear: linear-gradient(to right,
-      rgba(0, 0, 0, 0),
-      rgba(0, 0, 0, 1),
-      rgba(0, 0, 0, 0));
+  $linear: linear-gradient(
+    to right,
+    rgba(0, 0, 0, 0),
+    rgba(0, 0, 0, 1),
+    rgba(0, 0, 0, 0)
+  );
   mask-image: $linear;
 
   @mixin show {
@@ -373,7 +379,7 @@
   }
 }
 
-.chat-message-user>.chat-message-container {
+.chat-message-user > .chat-message-container {
   align-items: flex-end;
 }
 
@@ -443,6 +449,25 @@
   transition: all ease 0.3s;
 }
 
+.chat-message-audio {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  border-radius: 10px;
+  background-color: rgba(0, 0, 0, 0.05);
+  border: var(--border-in-light);
+  position: relative;
+  transition: all ease 0.3s;
+  margin-top: 10px;
+  font-size: 14px;
+  user-select: text;
+  word-break: break-word;
+  box-sizing: border-box;
+  audio {
+    height: 30px; /* 调整高度 */
+  }
+}
+
 .chat-message-item-image {
   width: 100%;
   margin-top: 10px;
@@ -471,23 +496,27 @@
   border: rgba($color: #888, $alpha: 0.2) 1px solid;
 }
 
-
 @media only screen and (max-width: 600px) {
-  $calc-image-width: calc(100vw/3*2/var(--image-count));
+  $calc-image-width: calc(100vw / 3 * 2 / var(--image-count));
 
   .chat-message-item-image-multi {
     width: $calc-image-width;
     height: $calc-image-width;
   }
-  
+
   .chat-message-item-image {
-    max-width: calc(100vw/3*2);
+    max-width: calc(100vw / 3 * 2);
   }
 }
 
 @media screen and (min-width: 600px) {
-  $max-image-width: calc(calc(1200px - var(--sidebar-width))/3*2/var(--image-count));
-  $image-width: calc(calc(var(--window-width) - var(--sidebar-width))/3*2/var(--image-count));
+  $max-image-width: calc(
+    calc(1200px - var(--sidebar-width)) / 3 * 2 / var(--image-count)
+  );
+  $image-width: calc(
+    calc(var(--window-width) - var(--sidebar-width)) / 3 * 2 /
+      var(--image-count)
+  );
 
   .chat-message-item-image-multi {
     width: $image-width;
@@ -497,7 +526,7 @@
   }
 
   .chat-message-item-image {
-    max-width: calc(calc(1200px - var(--sidebar-width))/3*2);
+    max-width: calc(calc(1200px - var(--sidebar-width)) / 3 * 2);
   }
 }
 
@@ -515,7 +544,7 @@
   z-index: 1;
 }
 
-.chat-message-user>.chat-message-container>.chat-message-item {
+.chat-message-user > .chat-message-container > .chat-message-item {
   background-color: var(--second);
 
   &:hover {
@@ -626,7 +655,8 @@
   min-height: 68px;
 }
 
-.chat-input:focus {}
+.chat-input:focus {
+}
 
 .chat-input-send {
   background-color: var(--primary);
@@ -693,4 +723,31 @@
 .shortcut-key span {
   font-size: 12px;
   color: var(--black);
-}
\ No newline at end of file
+}
+
+.chat-main {
+  display: flex;
+  height: 100%;
+  width: 100%;
+  position: relative;
+  overflow: hidden;
+  .chat-body-container {
+    height: 100%;
+    display: flex;
+    flex-direction: column;
+    flex: 1;
+    width: 100%;
+  }
+  .chat-side-panel {
+    position: absolute;
+    inset: 0;
+    background: var(--white);
+    overflow: hidden;
+    z-index: 10;
+    transform: translateX(100%);
+    transition: all ease 0.3s;
+    &-show {
+      transform: translateX(0);
+    }
+  }
+}
diff --git a/app/components/chat.tsx b/app/components/chat.tsx
index bf09c3f7eaa..197fcc20aba 100644
--- a/app/components/chat.tsx
+++ b/app/components/chat.tsx
@@ -46,7 +46,7 @@ import StyleIcon from "../icons/palette.svg";
 import PluginIcon from "../icons/plugin.svg";
 import ShortcutkeyIcon from "../icons/shortcutkey.svg";
 import ReloadIcon from "../icons/reload.svg";
-
+import HeadphoneIcon from "../icons/headphone.svg";
 import {
   ChatMessage,
   SubmitKey,
@@ -121,6 +121,9 @@ import { createTTSPlayer } from "../utils/audio";
 import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts";
 
 import { isEmpty } from "lodash-es";
+import { getModelProvider } from "../utils/model";
+import { RealtimeChat } from "@/app/components/realtime-chat";
+import clsx from "clsx";
 
 const localStorage = safeLocalStorage();
 
@@ -149,7 +152,8 @@ export function SessionConfigModel(props: { onClose: () => void }) {
             text={Locale.Chat.Config.Reset}
             onClick={async () => {
               if (await showConfirm(Locale.Memory.ResetConfirm)) {
-                chatStore.updateCurrentSession(
+                chatStore.updateTargetSession(
+                  session,
                   (session) => (session.memoryPrompt = ""),
                 );
               }
@@ -174,7 +178,10 @@ export function SessionConfigModel(props: { onClose: () => void }) {
           updateMask={(updater) => {
             const mask = { ...session.mask };
             updater(mask);
-            chatStore.updateCurrentSession((session) => (session.mask = mask));
+            chatStore.updateTargetSession(
+              session,
+              (session) => (session.mask = mask),
+            );
           }}
           shouldSyncFromGlobal
           extraListItems={
@@ -207,7 +214,7 @@ function PromptToast(props: {
     <div className={styles["prompt-toast"]} key="prompt-toast">
       {props.showToast && context.length > 0 && (
         <div
-          className={styles["prompt-toast-inner"] + " clickable"}
+          className={clsx(styles["prompt-toast-inner"], "clickable")}
           role="button"
           onClick={() => props.setShowModal(true)}
         >
@@ -328,10 +335,9 @@ export function PromptHints(props: {
       {props.prompts.map((prompt, i) => (
         <div
           ref={i === selectIndex ? selectedRef : null}
-          className={
-            styles["prompt-hint"] +
-            ` ${i === selectIndex ? styles["prompt-hint-selected"] : ""}`
-          }
+          className={clsx(styles["prompt-hint"], {
+            [styles["prompt-hint-selected"]]: i === selectIndex,
+          })}
           key={prompt.title + i.toString()}
           onClick={() => props.onPromptSelect(prompt)}
           onMouseEnter={() => setSelectIndex(i)}
@@ -346,12 +352,14 @@ export function PromptHints(props: {
 
 function ClearContextDivider() {
   const chatStore = useChatStore();
+  const session = chatStore.currentSession();
 
   return (
     <div
       className={styles["clear-context"]}
       onClick={() =>
-        chatStore.updateCurrentSession(
+        chatStore.updateTargetSession(
+          session,
           (session) => (session.clearContextIndex = undefined),
         )
       }
@@ -389,7 +397,7 @@ export function ChatAction(props: {
 
   return (
     <div
-      className={`${styles["chat-input-action"]} clickable`}
+      className={clsx(styles["chat-input-action"], "clickable")}
       onClick={() => {
         props.onClick();
         setTimeout(updateWidth, 1);
@@ -456,11 +464,13 @@ export function ChatActions(props: {
   uploading: boolean;
   setShowShortcutKeyModal: React.Dispatch<React.SetStateAction<boolean>>;
   setUserInput: (input: string) => void;
+  setShowChatSidePanel: React.Dispatch<React.SetStateAction<boolean>>;
 }) {
   const config = useAppConfig();
   const navigate = useNavigate();
   const chatStore = useChatStore();
   const pluginStore = usePluginStore();
+  const session = chatStore.currentSession();
 
   // switch themes
   const theme = config.theme;
@@ -477,10 +487,9 @@ export function ChatActions(props: {
   const stopAll = () => ChatControllerPool.stopAll();
 
   // switch model
-  const currentModel = chatStore.currentSession().mask.modelConfig.model;
+  const currentModel = session.mask.modelConfig.model;
   const currentProviderName =
-    chatStore.currentSession().mask.modelConfig?.providerName ||
-    ServiceProvider.OpenAI;
+    session.mask.modelConfig?.providerName || ServiceProvider.OpenAI;
   const allModels = useAllModels();
   const models = useMemo(() => {
     const filteredModels = allModels.filter((m) => m.available);
@@ -514,12 +523,9 @@ export function ChatActions(props: {
   const dalle3Sizes: DalleSize[] = ["1024x1024", "1792x1024", "1024x1792"];
   const dalle3Qualitys: DalleQuality[] = ["standard", "hd"];
   const dalle3Styles: DalleStyle[] = ["vivid", "natural"];
-  const currentSize =
-    chatStore.currentSession().mask.modelConfig?.size ?? "1024x1024";
-  const currentQuality =
-    chatStore.currentSession().mask.modelConfig?.quality ?? "standard";
-  const currentStyle =
-    chatStore.currentSession().mask.modelConfig?.style ?? "vivid";
+  const currentSize = session.mask.modelConfig?.size ?? "1024x1024";
+  const currentQuality = session.mask.modelConfig?.quality ?? "standard";
+  const currentStyle = session.mask.modelConfig?.style ?? "vivid";
 
   const isMobileScreen = useMobileScreen();
 
@@ -537,7 +543,7 @@ export function ChatActions(props: {
     if (isUnavailableModel && models.length > 0) {
       // show next model to default model if exist
       let nextModel = models.find((model) => model.isDefault) || models[0];
-      chatStore.updateCurrentSession((session) => {
+      chatStore.updateTargetSession(session, (session) => {
         session.mask.modelConfig.model = nextModel.name;
         session.mask.modelConfig.providerName = nextModel?.provider
           ?.providerName as ServiceProvider;
@@ -548,242 +554,254 @@ export function ChatActions(props: {
           : nextModel.name,
       );
     }
-  }, [chatStore, currentModel, models]);
+  }, [chatStore, currentModel, models, session]);
 
   return (
     <div className={styles["chat-input-actions"]}>
-      {couldStop && (
-        <ChatAction
-          onClick={stopAll}
-          text={Locale.Chat.InputActions.Stop}
-          icon={<StopIcon />}
-        />
-      )}
-      {!props.hitBottom && (
+      <>
+        {couldStop && (
+          <ChatAction
+            onClick={stopAll}
+            text={Locale.Chat.InputActions.Stop}
+            icon={<StopIcon />}
+          />
+        )}
+        {!props.hitBottom && (
+          <ChatAction
+            onClick={props.scrollToBottom}
+            text={Locale.Chat.InputActions.ToBottom}
+            icon={<BottomIcon />}
+          />
+        )}
+        {props.hitBottom && (
+          <ChatAction
+            onClick={props.showPromptModal}
+            text={Locale.Chat.InputActions.Settings}
+            icon={<SettingsIcon />}
+          />
+        )}
+
+        {showUploadImage && (
+          <ChatAction
+            onClick={props.uploadImage}
+            text={Locale.Chat.InputActions.UploadImage}
+            icon={props.uploading ? <LoadingButtonIcon /> : <ImageIcon />}
+          />
+        )}
         <ChatAction
-          onClick={props.scrollToBottom}
-          text={Locale.Chat.InputActions.ToBottom}
-          icon={<BottomIcon />}
+          onClick={nextTheme}
+          text={Locale.Chat.InputActions.Theme[theme]}
+          icon={
+            <>
+              {theme === Theme.Auto ? (
+                <AutoIcon />
+              ) : theme === Theme.Light ? (
+                <LightIcon />
+              ) : theme === Theme.Dark ? (
+                <DarkIcon />
+              ) : null}
+            </>
+          }
         />
-      )}
-      {props.hitBottom && (
+
         <ChatAction
-          onClick={props.showPromptModal}
-          text={Locale.Chat.InputActions.Settings}
-          icon={<SettingsIcon />}
+          onClick={props.showPromptHints}
+          text={Locale.Chat.InputActions.Prompt}
+          icon={<PromptIcon />}
         />
-      )}
 
-      {showUploadImage && (
         <ChatAction
-          onClick={props.uploadImage}
-          text={Locale.Chat.InputActions.UploadImage}
-          icon={props.uploading ? <LoadingButtonIcon /> : <ImageIcon />}
-        />
-      )}
-      <ChatAction
-        onClick={nextTheme}
-        text={Locale.Chat.InputActions.Theme[theme]}
-        icon={
-          <>
-            {theme === Theme.Auto ? (
-              <AutoIcon />
-            ) : theme === Theme.Light ? (
-              <LightIcon />
-            ) : theme === Theme.Dark ? (
-              <DarkIcon />
-            ) : null}
-          </>
-        }
-      />
-
-      <ChatAction
-        onClick={props.showPromptHints}
-        text={Locale.Chat.InputActions.Prompt}
-        icon={<PromptIcon />}
-      />
-
-      <ChatAction
-        onClick={() => {
-          navigate(Path.Masks);
-        }}
-        text={Locale.Chat.InputActions.Masks}
-        icon={<MaskIcon />}
-      />
-
-      <ChatAction
-        text={Locale.Chat.InputActions.Clear}
-        icon={<BreakIcon />}
-        onClick={() => {
-          chatStore.updateCurrentSession((session) => {
-            if (session.clearContextIndex === session.messages.length) {
-              session.clearContextIndex = undefined;
-            } else {
-              session.clearContextIndex = session.messages.length;
-              session.memoryPrompt = ""; // will clear memory
-            }
-          });
-        }}
-      />
-
-      <ChatAction
-        onClick={() => setShowModelSelector(true)}
-        text={currentModelName}
-        icon={<RobotIcon />}
-      />
-
-      {showModelSelector && (
-        <Selector
-          defaultSelectedValue={`${currentModel}@${currentProviderName}`}
-          items={models.map((m) => ({
-            title: `${m.displayName}${
-              m?.provider?.providerName
-                ? " (" + m?.provider?.providerName + ")"
-                : ""
-            }`,
-            value: `${m.name}@${m?.provider?.providerName}`,
-          }))}
-          onClose={() => setShowModelSelector(false)}
-          onSelection={(s) => {
-            if (s.length === 0) return;
-            const [model, providerName] = s[0].split("@");
-            chatStore.updateCurrentSession((session) => {
-              session.mask.modelConfig.model = model as ModelType;
-              session.mask.modelConfig.providerName =
-                providerName as ServiceProvider;
-              session.mask.syncGlobalConfig = false;
-            });
-            if (providerName == "ByteDance") {
-              const selectedModel = models.find(
-                (m) =>
-                  m.name == model && m?.provider?.providerName == providerName,
-              );
-              showToast(selectedModel?.displayName ?? "");
-            } else {
-              showToast(model);
-            }
+          onClick={() => {
+            navigate(Path.Masks);
           }}
+          text={Locale.Chat.InputActions.Masks}
+          icon={<MaskIcon />}
         />
-      )}
 
-      {isDalle3(currentModel) && (
         <ChatAction
-          onClick={() => setShowSizeSelector(true)}
-          text={currentSize}
-          icon={<SizeIcon />}
-        />
-      )}
-
-      {showSizeSelector && (
-        <Selector
-          defaultSelectedValue={currentSize}
-          items={dalle3Sizes.map((m) => ({
-            title: m,
-            value: m,
-          }))}
-          onClose={() => setShowSizeSelector(false)}
-          onSelection={(s) => {
-            if (s.length === 0) return;
-            const size = s[0];
-            chatStore.updateCurrentSession((session) => {
-              session.mask.modelConfig.size = size;
+          text={Locale.Chat.InputActions.Clear}
+          icon={<BreakIcon />}
+          onClick={() => {
+            chatStore.updateTargetSession(session, (session) => {
+              if (session.clearContextIndex === session.messages.length) {
+                session.clearContextIndex = undefined;
+              } else {
+                session.clearContextIndex = session.messages.length;
+                session.memoryPrompt = ""; // will clear memory
+              }
             });
-            showToast(size);
           }}
         />
-      )}
 
-      {isDalle3(currentModel) && (
         <ChatAction
-          onClick={() => setShowQualitySelector(true)}
-          text={currentQuality}
-          icon={<QualityIcon />}
+          onClick={() => setShowModelSelector(true)}
+          text={currentModelName}
+          icon={<RobotIcon />}
         />
-      )}
 
-      {showQualitySelector && (
-        <Selector
-          defaultSelectedValue={currentQuality}
-          items={dalle3Qualitys.map((m) => ({
-            title: m,
-            value: m,
-          }))}
-          onClose={() => setShowQualitySelector(false)}
-          onSelection={(q) => {
-            if (q.length === 0) return;
-            const quality = q[0];
-            chatStore.updateCurrentSession((session) => {
-              session.mask.modelConfig.quality = quality;
-            });
-            showToast(quality);
-          }}
-        />
-      )}
+        {showModelSelector && (
+          <Selector
+            defaultSelectedValue={`${currentModel}@${currentProviderName}`}
+            items={models.map((m) => ({
+              title: `${m.displayName}${
+                m?.provider?.providerName
+                  ? " (" + m?.provider?.providerName + ")"
+                  : ""
+              }`,
+              value: `${m.name}@${m?.provider?.providerName}`,
+            }))}
+            onClose={() => setShowModelSelector(false)}
+            onSelection={(s) => {
+              if (s.length === 0) return;
+              const [model, providerName] = getModelProvider(s[0]);
+              chatStore.updateTargetSession(session, (session) => {
+                session.mask.modelConfig.model = model as ModelType;
+                session.mask.modelConfig.providerName =
+                  providerName as ServiceProvider;
+                session.mask.syncGlobalConfig = false;
+              });
+              if (providerName == "ByteDance") {
+                const selectedModel = models.find(
+                  (m) =>
+                    m.name == model &&
+                    m?.provider?.providerName == providerName,
+                );
+                showToast(selectedModel?.displayName ?? "");
+              } else {
+                showToast(model);
+              }
+            }}
+          />
+        )}
 
-      {isDalle3(currentModel) && (
-        <ChatAction
-          onClick={() => setShowStyleSelector(true)}
-          text={currentStyle}
-          icon={<StyleIcon />}
-        />
-      )}
+        {isDalle3(currentModel) && (
+          <ChatAction
+            onClick={() => setShowSizeSelector(true)}
+            text={currentSize}
+            icon={<SizeIcon />}
+          />
+        )}
 
-      {showStyleSelector && (
-        <Selector
-          defaultSelectedValue={currentStyle}
-          items={dalle3Styles.map((m) => ({
-            title: m,
-            value: m,
-          }))}
-          onClose={() => setShowStyleSelector(false)}
-          onSelection={(s) => {
-            if (s.length === 0) return;
-            const style = s[0];
-            chatStore.updateCurrentSession((session) => {
-              session.mask.modelConfig.style = style;
-            });
-            showToast(style);
-          }}
-        />
-      )}
+        {showSizeSelector && (
+          <Selector
+            defaultSelectedValue={currentSize}
+            items={dalle3Sizes.map((m) => ({
+              title: m,
+              value: m,
+            }))}
+            onClose={() => setShowSizeSelector(false)}
+            onSelection={(s) => {
+              if (s.length === 0) return;
+              const size = s[0];
+              chatStore.updateTargetSession(session, (session) => {
+                session.mask.modelConfig.size = size;
+              });
+              showToast(size);
+            }}
+          />
+        )}
 
-      {showPlugins(currentProviderName, currentModel) && (
-        <ChatAction
-          onClick={() => {
-            if (pluginStore.getAll().length == 0) {
-              navigate(Path.Plugins);
-            } else {
-              setShowPluginSelector(true);
-            }
-          }}
-          text={Locale.Plugin.Name}
-          icon={<PluginIcon />}
-        />
-      )}
-      {showPluginSelector && (
-        <Selector
-          multiple
-          defaultSelectedValue={chatStore.currentSession().mask?.plugin}
-          items={pluginStore.getAll().map((item) => ({
-            title: `${item?.title}@${item?.version}`,
-            value: item?.id,
-          }))}
-          onClose={() => setShowPluginSelector(false)}
-          onSelection={(s) => {
-            chatStore.updateCurrentSession((session) => {
-              session.mask.plugin = s as string[];
-            });
-          }}
-        />
-      )}
+        {isDalle3(currentModel) && (
+          <ChatAction
+            onClick={() => setShowQualitySelector(true)}
+            text={currentQuality}
+            icon={<QualityIcon />}
+          />
+        )}
 
-      {!isMobileScreen && (
-        <ChatAction
-          onClick={() => props.setShowShortcutKeyModal(true)}
-          text={Locale.Chat.ShortcutKey.Title}
-          icon={<ShortcutkeyIcon />}
-        />
-      )}
+        {showQualitySelector && (
+          <Selector
+            defaultSelectedValue={currentQuality}
+            items={dalle3Qualitys.map((m) => ({
+              title: m,
+              value: m,
+            }))}
+            onClose={() => setShowQualitySelector(false)}
+            onSelection={(q) => {
+              if (q.length === 0) return;
+              const quality = q[0];
+              chatStore.updateTargetSession(session, (session) => {
+                session.mask.modelConfig.quality = quality;
+              });
+              showToast(quality);
+            }}
+          />
+        )}
+
+        {isDalle3(currentModel) && (
+          <ChatAction
+            onClick={() => setShowStyleSelector(true)}
+            text={currentStyle}
+            icon={<StyleIcon />}
+          />
+        )}
+
+        {showStyleSelector && (
+          <Selector
+            defaultSelectedValue={currentStyle}
+            items={dalle3Styles.map((m) => ({
+              title: m,
+              value: m,
+            }))}
+            onClose={() => setShowStyleSelector(false)}
+            onSelection={(s) => {
+              if (s.length === 0) return;
+              const style = s[0];
+              chatStore.updateTargetSession(session, (session) => {
+                session.mask.modelConfig.style = style;
+              });
+              showToast(style);
+            }}
+          />
+        )}
+
+        {showPlugins(currentProviderName, currentModel) && (
+          <ChatAction
+            onClick={() => {
+              if (pluginStore.getAll().length == 0) {
+                navigate(Path.Plugins);
+              } else {
+                setShowPluginSelector(true);
+              }
+            }}
+            text={Locale.Plugin.Name}
+            icon={<PluginIcon />}
+          />
+        )}
+        {showPluginSelector && (
+          <Selector
+            multiple
+            defaultSelectedValue={chatStore.currentSession().mask?.plugin}
+            items={pluginStore.getAll().map((item) => ({
+              title: `${item?.title}@${item?.version}`,
+              value: item?.id,
+            }))}
+            onClose={() => setShowPluginSelector(false)}
+            onSelection={(s) => {
+              chatStore.updateTargetSession(session, (session) => {
+                session.mask.plugin = s as string[];
+              });
+            }}
+          />
+        )}
+
+        {!isMobileScreen && (
+          <ChatAction
+            onClick={() => props.setShowShortcutKeyModal(true)}
+            text={Locale.Chat.ShortcutKey.Title}
+            icon={<ShortcutkeyIcon />}
+          />
+        )}
+      </>
+      <div className={styles["chat-input-actions-end"]}>
+        {config.realtimeConfig.enable && (
+          <ChatAction
+            onClick={() => props.setShowChatSidePanel(true)}
+            text={"Realtime Chat"}
+            icon={<HeadphoneIcon />}
+          />
+        )}
+      </div>
     </div>
   );
 }
@@ -813,7 +831,8 @@ export function EditMessageModal(props: { onClose: () => void }) {
             icon={<ConfirmIcon />}
             key="ok"
             onClick={() => {
-              chatStore.updateCurrentSession(
+              chatStore.updateTargetSession(
+                session,
                 (session) => (session.messages = messages),
               );
               props.onClose();
@@ -830,7 +849,8 @@ export function EditMessageModal(props: { onClose: () => void }) {
               type="text"
               value={session.topic}
               onInput={(e) =>
-                chatStore.updateCurrentSession(
+                chatStore.updateTargetSession(
+                  session,
                   (session) => (session.topic = e.currentTarget.value),
                 )
               }
@@ -941,9 +961,24 @@ function _Chat() {
           (scrollRef.current.scrollTop + scrollRef.current.clientHeight),
       ) <= 1
     : false;
+  const isAttachWithTop = useMemo(() => {
+    const lastMessage = scrollRef.current?.lastElementChild as HTMLElement;
+    // if scrolllRef is not ready or no message, return false
+    if (!scrollRef?.current || !lastMessage) return false;
+    const topDistance =
+      lastMessage!.getBoundingClientRect().top -
+      scrollRef.current.getBoundingClientRect().top;
+    // leave some space for user question
+    return topDistance < 100;
+  }, [scrollRef?.current?.scrollHeight]);
+
+  const isTyping = userInput !== "";
+
+  // if user is typing, should auto scroll to bottom
+  // if user is not typing, should auto scroll to bottom only if already at bottom
   const { setAutoScroll, scrollDomToBottom } = useScrollToBottom(
     scrollRef,
-    isScrolledToBottom,
+    (isScrolledToBottom || isAttachWithTop) && !isTyping,
   );
   const [hitBottom, setHitBottom] = useState(true);
   const isMobileScreen = useMobileScreen();
@@ -992,7 +1027,8 @@ function _Chat() {
     prev: () => chatStore.nextSession(-1),
     next: () => chatStore.nextSession(1),
     clear: () =>
-      chatStore.updateCurrentSession(
+      chatStore.updateTargetSession(
+        session,
         (session) => (session.clearContextIndex = session.messages.length),
       ),
     fork: () => chatStore.forkSession(),
@@ -1063,7 +1099,7 @@ function _Chat() {
   };
 
   useEffect(() => {
-    chatStore.updateCurrentSession((session) => {
+    chatStore.updateTargetSession(session, (session) => {
       const stopTiming = Date.now() - REQUEST_TIMEOUT_MS;
       session.messages.forEach((m) => {
         // check if should stop all stale messages
@@ -1089,7 +1125,7 @@ function _Chat() {
       }
     });
     // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, []);
+  }, [session]);
 
   // check if should send message
   const onInputKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
@@ -1120,7 +1156,7 @@ function _Chat() {
   };
 
   const deleteMessage = (msgId?: string) => {
-    chatStore.updateCurrentSession((session) => {
+    chatStore.updateTargetSession(session, (session) => {
       session.deletedMessageIds &&
         removeOutdatedEntries(session.deletedMessageIds);
       session.messages = session.messages.filter((m) => {
@@ -1197,7 +1233,7 @@ function _Chat() {
   };
 
   const onPinMessage = (message: ChatMessage) => {
-    chatStore.updateCurrentSession((session) =>
+    chatStore.updateTargetSession(session, (session) =>
       session.mask.context.push(message),
     );
 
@@ -1584,415 +1620,462 @@ function _Chat() {
     };
   }, [messages, chatStore, navigate]);
 
+  const [showChatSidePanel, setShowChatSidePanel] = useState(false);
+
   return (
-    <div className={styles.chat} key={session.id}>
-      <div className="window-header" data-tauri-drag-region>
-        {isMobileScreen && (
-          <div className="window-actions">
-            <div className={"window-action-button"}>
-              <IconButton
-                icon={<ReturnIcon />}
-                bordered
-                title={Locale.Chat.Actions.ChatList}
-                onClick={() => navigate(Path.Home)}
-              />
+    <>
+      <div className={styles.chat} key={session.id}>
+        <div className="window-header" data-tauri-drag-region>
+          {isMobileScreen && (
+            <div className="window-actions">
+              <div className={"window-action-button"}>
+                <IconButton
+                  icon={<ReturnIcon />}
+                  bordered
+                  title={Locale.Chat.Actions.ChatList}
+                  onClick={() => navigate(Path.Home)}
+                />
+              </div>
             </div>
-          </div>
-        )}
+          )}
 
-        <div className={`window-header-title ${styles["chat-body-title"]}`}>
           <div
-            className={`window-header-main-title ${styles["chat-body-main-title"]}`}
-            onClickCapture={() => setIsEditingMessage(true)}
+            className={clsx("window-header-title", styles["chat-body-title"])}
           >
-            {!session.topic ? DEFAULT_TOPIC : session.topic}
-          </div>
-          <div className="window-header-sub-title">
-            {Locale.Chat.SubTitle(session.messages.length)}
-          </div>
-        </div>
-        <div className="window-actions">
-          <div className="window-action-button">
-            <IconButton
-              icon={<ReloadIcon />}
-              bordered
-              title={Locale.Chat.Actions.RefreshTitle}
-              onClick={() => {
-                showToast(Locale.Chat.Actions.RefreshToast);
-                chatStore.summarizeSession(true);
-              }}
-            />
+            <div
+              className={clsx(
+                "window-header-main-title",
+                styles["chat-body-main-title"],
+              )}
+              onClickCapture={() => setIsEditingMessage(true)}
+            >
+              {!session.topic ? DEFAULT_TOPIC : session.topic}
+            </div>
+            <div className="window-header-sub-title">
+              {Locale.Chat.SubTitle(session.messages.length)}
+            </div>
           </div>
-          {!isMobileScreen && (
+          <div className="window-actions">
             <div className="window-action-button">
               <IconButton
-                icon={<RenameIcon />}
+                icon={<ReloadIcon />}
                 bordered
-                title={Locale.Chat.EditMessage.Title}
-                aria={Locale.Chat.EditMessage.Title}
-                onClick={() => setIsEditingMessage(true)}
+                title={Locale.Chat.Actions.RefreshTitle}
+                onClick={() => {
+                  showToast(Locale.Chat.Actions.RefreshToast);
+                  chatStore.summarizeSession(true, session);
+                }}
               />
             </div>
-          )}
-          <div className="window-action-button">
-            <IconButton
-              icon={<ExportIcon />}
-              bordered
-              title={Locale.Chat.Actions.Export}
-              onClick={() => {
-                setShowExport(true);
-              }}
-            />
-          </div>
-          {showMaxIcon && (
+            {!isMobileScreen && (
+              <div className="window-action-button">
+                <IconButton
+                  icon={<RenameIcon />}
+                  bordered
+                  title={Locale.Chat.EditMessage.Title}
+                  aria={Locale.Chat.EditMessage.Title}
+                  onClick={() => setIsEditingMessage(true)}
+                />
+              </div>
+            )}
             <div className="window-action-button">
               <IconButton
-                icon={config.tightBorder ? <MinIcon /> : <MaxIcon />}
+                icon={<ExportIcon />}
                 bordered
-                title={Locale.Chat.Actions.FullScreen}
-                aria={Locale.Chat.Actions.FullScreen}
+                title={Locale.Chat.Actions.Export}
                 onClick={() => {
-                  config.update(
-                    (config) => (config.tightBorder = !config.tightBorder),
-                  );
+                  setShowExport(true);
                 }}
               />
             </div>
-          )}
-        </div>
+            {showMaxIcon && (
+              <div className="window-action-button">
+                <IconButton
+                  icon={config.tightBorder ? <MinIcon /> : <MaxIcon />}
+                  bordered
+                  title={Locale.Chat.Actions.FullScreen}
+                  aria={Locale.Chat.Actions.FullScreen}
+                  onClick={() => {
+                    config.update(
+                      (config) => (config.tightBorder = !config.tightBorder),
+                    );
+                  }}
+                />
+              </div>
+            )}
+          </div>
 
-        <PromptToast
-          showToast={!hitBottom}
-          showModal={showPromptModal}
-          setShowModal={setShowPromptModal}
-        />
-      </div>
+          <PromptToast
+            showToast={!hitBottom}
+            showModal={showPromptModal}
+            setShowModal={setShowPromptModal}
+          />
+        </div>
+        <div className={styles["chat-main"]}>
+          <div className={styles["chat-body-container"]}>
+            <div
+              className={styles["chat-body"]}
+              ref={scrollRef}
+              onScroll={(e) => onChatBodyScroll(e.currentTarget)}
+              onMouseDown={() => inputRef.current?.blur()}
+              onTouchStart={() => {
+                inputRef.current?.blur();
+                setAutoScroll(false);
+              }}
+            >
+              {messages.map((message, i) => {
+                const isUser = message.role === "user";
+                const isContext = i < context.length;
+                const showActions =
+                  i > 0 &&
+                  !(message.preview || message.content.length === 0) &&
+                  !isContext;
+                const showTyping = message.preview || message.streaming;
+
+                const shouldShowClearContextDivider =
+                  i === clearContextIndex - 1;
 
-      <div
-        className={styles["chat-body"]}
-        ref={scrollRef}
-        onScroll={(e) => onChatBodyScroll(e.currentTarget)}
-        onMouseDown={() => inputRef.current?.blur()}
-        onTouchStart={() => {
-          inputRef.current?.blur();
-          setAutoScroll(false);
-        }}
-      >
-        {messages.map((message, i) => {
-          const isUser = message.role === "user";
-          const isContext = i < context.length;
-          const showActions =
-            i > 0 &&
-            !(message.preview || message.content.length === 0) &&
-            !isContext;
-          const showTyping = message.preview || message.streaming;
-
-          const shouldShowClearContextDivider = i === clearContextIndex - 1;
-
-          return (
-            <Fragment key={message.id}>
-              <div
-                className={
-                  isUser ? styles["chat-message-user"] : styles["chat-message"]
-                }
-              >
-                <div className={styles["chat-message-container"]}>
-                  <div className={styles["chat-message-header"]}>
-                    <div className={styles["chat-message-avatar"]}>
-                      <div className={styles["chat-message-edit"]}>
-                        <IconButton
-                          icon={<EditIcon />}
-                          aria={Locale.Chat.Actions.Edit}
-                          onClick={async () => {
-                            const newMessage = await showPrompt(
-                              Locale.Chat.Actions.Edit,
-                              getMessageTextContent(message),
-                              10,
-                            );
-                            let newContent: string | MultimodalContent[] =
-                              newMessage;
-                            const images = getMessageImages(message);
-                            if (images.length > 0) {
-                              newContent = [{ type: "text", text: newMessage }];
-                              for (let i = 0; i < images.length; i++) {
-                                newContent.push({
-                                  type: "image_url",
-                                  image_url: {
-                                    url: images[i],
-                                  },
-                                });
-                              }
-                            }
-                            chatStore.updateCurrentSession((session) => {
-                              const m = session.mask.context
-                                .concat(session.messages)
-                                .find((m) => m.id === message.id);
-                              if (m) {
-                                m.content = newContent;
-                              }
-                            });
-                          }}
-                        ></IconButton>
-                      </div>
-                      {isUser ? (
-                        <Avatar avatar={config.avatar} />
-                      ) : (
-                        <>
-                          {["system"].includes(message.role) ? (
-                            <Avatar avatar="2699-fe0f" />
-                          ) : (
-                            <MaskAvatar
-                              avatar={session.mask.avatar}
-                              model={
-                                message.model || session.mask.modelConfig.model
-                              }
-                            />
-                          )}
-                        </>
-                      )}
-                    </div>
-                    {!isUser && (
-                      <div className={styles["chat-model-name"]}>
-                        {message.model}
-                      </div>
-                    )}
-
-                    {showActions && (
-                      <div className={styles["chat-message-actions"]}>
-                        <div className={styles["chat-input-actions"]}>
-                          {message.streaming ? (
-                            <ChatAction
-                              text={Locale.Chat.Actions.Stop}
-                              icon={<StopIcon />}
-                              onClick={() => onUserStop(message.id ?? i)}
-                            />
-                          ) : (
-                            <>
-                              <ChatAction
-                                text={Locale.Chat.Actions.Retry}
-                                icon={<ResetIcon />}
-                                onClick={() => onResend(message)}
-                              />
-
-                              <ChatAction
-                                text={Locale.Chat.Actions.Delete}
-                                icon={<DeleteIcon />}
-                                onClick={() => onDelete(message.id ?? i)}
-                              />
-
-                              <ChatAction
-                                text={Locale.Chat.Actions.Pin}
-                                icon={<PinIcon />}
-                                onClick={() => onPinMessage(message)}
-                              />
-                              <ChatAction
-                                text={Locale.Chat.Actions.Copy}
-                                icon={<CopyIcon />}
-                                onClick={() =>
-                                  copyToClipboard(
+                return (
+                  <Fragment key={message.id}>
+                    <div
+                      className={
+                        isUser
+                          ? styles["chat-message-user"]
+                          : styles["chat-message"]
+                      }
+                    >
+                      <div className={styles["chat-message-container"]}>
+                        <div className={styles["chat-message-header"]}>
+                          <div className={styles["chat-message-avatar"]}>
+                            <div className={styles["chat-message-edit"]}>
+                              <IconButton
+                                icon={<EditIcon />}
+                                aria={Locale.Chat.Actions.Edit}
+                                onClick={async () => {
+                                  const newMessage = await showPrompt(
+                                    Locale.Chat.Actions.Edit,
                                     getMessageTextContent(message),
-                                  )
-                                }
-                              />
-                              {config.ttsConfig.enable && (
-                                <ChatAction
-                                  text={
-                                    speechStatus
-                                      ? Locale.Chat.Actions.StopSpeech
-                                      : Locale.Chat.Actions.Speech
-                                  }
-                                  icon={
-                                    speechStatus ? (
-                                      <SpeakStopIcon />
-                                    ) : (
-                                      <SpeakIcon />
-                                    )
-                                  }
-                                  onClick={() =>
-                                    openaiSpeech(getMessageTextContent(message))
+                                    10,
+                                  );
+                                  let newContent: string | MultimodalContent[] =
+                                    newMessage;
+                                  const images = getMessageImages(message);
+                                  if (images.length > 0) {
+                                    newContent = [
+                                      { type: "text", text: newMessage },
+                                    ];
+                                    for (let i = 0; i < images.length; i++) {
+                                      newContent.push({
+                                        type: "image_url",
+                                        image_url: {
+                                          url: images[i],
+                                        },
+                                      });
+                                    }
                                   }
-                                />
-                              )}
-                            </>
+                                  chatStore.updateTargetSession(
+                                    session,
+                                    (session) => {
+                                      const m = session.mask.context
+                                        .concat(session.messages)
+                                        .find((m) => m.id === message.id);
+                                      if (m) {
+                                        m.content = newContent;
+                                      }
+                                    },
+                                  );
+                                }}
+                              ></IconButton>
+                            </div>
+                            {isUser ? (
+                              <Avatar avatar={config.avatar} />
+                            ) : (
+                              <>
+                                {["system"].includes(message.role) ? (
+                                  <Avatar avatar="2699-fe0f" />
+                                ) : (
+                                  <MaskAvatar
+                                    avatar={session.mask.avatar}
+                                    model={
+                                      message.model ||
+                                      session.mask.modelConfig.model
+                                    }
+                                  />
+                                )}
+                              </>
+                            )}
+                          </div>
+                          {!isUser && (
+                            <div className={styles["chat-model-name"]}>
+                              {message.model}
+                            </div>
                           )}
-                        </div>
-                      </div>
-                    )}
-                  </div>
-                  {message?.tools?.length == 0 && showTyping && (
-                    <div className={styles["chat-message-status"]}>
-                      {Locale.Chat.Typing}
-                    </div>
-                  )}
-                  {/*@ts-ignore*/}
-                  {message?.tools?.length > 0 && (
-                    <div className={styles["chat-message-tools"]}>
-                      {message?.tools?.map((tool) => (
-                        <div
-                          key={tool.id}
-                          title={tool?.errorMsg}
-                          className={styles["chat-message-tool"]}
-                        >
-                          {tool.isError === false ? (
-                            <ConfirmIcon />
-                          ) : tool.isError === true ? (
-                            <CloseIcon />
-                          ) : (
-                            <LoadingButtonIcon />
+
+                          {showActions && (
+                            <div className={styles["chat-message-actions"]}>
+                              <div className={styles["chat-input-actions"]}>
+                                {message.streaming ? (
+                                  <ChatAction
+                                    text={Locale.Chat.Actions.Stop}
+                                    icon={<StopIcon />}
+                                    onClick={() => onUserStop(message.id ?? i)}
+                                  />
+                                ) : (
+                                  <>
+                                    <ChatAction
+                                      text={Locale.Chat.Actions.Retry}
+                                      icon={<ResetIcon />}
+                                      onClick={() => onResend(message)}
+                                    />
+
+                                    <ChatAction
+                                      text={Locale.Chat.Actions.Delete}
+                                      icon={<DeleteIcon />}
+                                      onClick={() => onDelete(message.id ?? i)}
+                                    />
+
+                                    <ChatAction
+                                      text={Locale.Chat.Actions.Pin}
+                                      icon={<PinIcon />}
+                                      onClick={() => onPinMessage(message)}
+                                    />
+                                    <ChatAction
+                                      text={Locale.Chat.Actions.Copy}
+                                      icon={<CopyIcon />}
+                                      onClick={() =>
+                                        copyToClipboard(
+                                          getMessageTextContent(message),
+                                        )
+                                      }
+                                    />
+                                    {config.ttsConfig.enable && (
+                                      <ChatAction
+                                        text={
+                                          speechStatus
+                                            ? Locale.Chat.Actions.StopSpeech
+                                            : Locale.Chat.Actions.Speech
+                                        }
+                                        icon={
+                                          speechStatus ? (
+                                            <SpeakStopIcon />
+                                          ) : (
+                                            <SpeakIcon />
+                                          )
+                                        }
+                                        onClick={() =>
+                                          openaiSpeech(
+                                            getMessageTextContent(message),
+                                          )
+                                        }
+                                      />
+                                    )}
+                                  </>
+                                )}
+                              </div>
+                            </div>
                           )}
-                          <span>{tool?.function?.name}</span>
                         </div>
-                      ))}
-                    </div>
-                  )}
-                  <div className={styles["chat-message-item"]}>
-                    <Markdown
-                      key={message.streaming ? "loading" : "done"}
-                      content={getMessageTextContent(message)}
-                      loading={
-                        (message.preview || message.streaming) &&
-                        message.content.length === 0 &&
-                        !isUser
-                      }
-                      //   onContextMenu={(e) => onRightClick(e, message)} // hard to use
-                      onDoubleClickCapture={() => {
-                        if (!isMobileScreen) return;
-                        setUserInput(getMessageTextContent(message));
-                      }}
-                      fontSize={fontSize}
-                      fontFamily={fontFamily}
-                      parentRef={scrollRef}
-                      defaultShow={i >= messages.length - 6}
-                    />
-                    {getMessageImages(message).length == 1 && (
-                      <img
-                        className={styles["chat-message-item-image"]}
-                        src={getMessageImages(message)[0]}
-                        alt=""
-                      />
-                    )}
-                    {getMessageImages(message).length > 1 && (
-                      <div
-                        className={styles["chat-message-item-images"]}
-                        style={
-                          {
-                            "--image-count": getMessageImages(message).length,
-                          } as React.CSSProperties
-                        }
-                      >
-                        {getMessageImages(message).map((image, index) => {
-                          return (
+                        {message?.tools?.length == 0 && showTyping && (
+                          <div className={styles["chat-message-status"]}>
+                            {Locale.Chat.Typing}
+                          </div>
+                        )}
+                        {/*@ts-ignore*/}
+                        {message?.tools?.length > 0 && (
+                          <div className={styles["chat-message-tools"]}>
+                            {message?.tools?.map((tool) => (
+                              <div
+                                key={tool.id}
+                                title={tool?.errorMsg}
+                                className={styles["chat-message-tool"]}
+                              >
+                                {tool.isError === false ? (
+                                  <ConfirmIcon />
+                                ) : tool.isError === true ? (
+                                  <CloseIcon />
+                                ) : (
+                                  <LoadingButtonIcon />
+                                )}
+                                <span>{tool?.function?.name}</span>
+                              </div>
+                            ))}
+                          </div>
+                        )}
+                        <div className={styles["chat-message-item"]}>
+                          <Markdown
+                            key={message.streaming ? "loading" : "done"}
+                            content={getMessageTextContent(message)}
+                            loading={
+                              (message.preview || message.streaming) &&
+                              message.content.length === 0 &&
+                              !isUser
+                            }
+                            //   onContextMenu={(e) => onRightClick(e, message)} // hard to use
+                            onDoubleClickCapture={() => {
+                              if (!isMobileScreen) return;
+                              setUserInput(getMessageTextContent(message));
+                            }}
+                            fontSize={fontSize}
+                            fontFamily={fontFamily}
+                            parentRef={scrollRef}
+                            defaultShow={i >= messages.length - 6}
+                          />
+                          {getMessageImages(message).length == 1 && (
                             <img
-                              className={
-                                styles["chat-message-item-image-multi"]
-                              }
-                              key={index}
-                              src={image}
+                              className={styles["chat-message-item-image"]}
+                              src={getMessageImages(message)[0]}
                               alt=""
                             />
-                          );
-                        })}
+                          )}
+                          {getMessageImages(message).length > 1 && (
+                            <div
+                              className={styles["chat-message-item-images"]}
+                              style={
+                                {
+                                  "--image-count":
+                                    getMessageImages(message).length,
+                                } as React.CSSProperties
+                              }
+                            >
+                              {getMessageImages(message).map((image, index) => {
+                                return (
+                                  <img
+                                    className={
+                                      styles["chat-message-item-image-multi"]
+                                    }
+                                    key={index}
+                                    src={image}
+                                    alt=""
+                                  />
+                                );
+                              })}
+                            </div>
+                          )}
+                        </div>
+                        {message?.audio_url && (
+                          <div className={styles["chat-message-audio"]}>
+                            <audio src={message.audio_url} controls />
+                          </div>
+                        )}
+
+                        <div className={styles["chat-message-action-date"]}>
+                          {isContext
+                            ? Locale.Chat.IsContext
+                            : message.date.toLocaleString()}
+                        </div>
                       </div>
-                    )}
-                  </div>
-
-                  <div className={styles["chat-message-action-date"]}>
-                    {isContext
-                      ? Locale.Chat.IsContext
-                      : message.date.toLocaleString()}
-                  </div>
-                </div>
-              </div>
-              {shouldShowClearContextDivider && <ClearContextDivider />}
-            </Fragment>
-          );
-        })}
-      </div>
-
-      <div className={styles["chat-input-panel"]}>
-        <PromptHints prompts={promptHints} onPromptSelect={onPromptSelect} />
-
-        <ChatActions
-          uploadImage={uploadImage}
-          setAttachImages={setAttachImages}
-          setUploading={setUploading}
-          showPromptModal={() => setShowPromptModal(true)}
-          scrollToBottom={scrollToBottom}
-          hitBottom={hitBottom}
-          uploading={uploading}
-          showPromptHints={() => {
-            // Click again to close
-            if (promptHints.length > 0) {
-              setPromptHints([]);
-              return;
-            }
-
-            inputRef.current?.focus();
-            setUserInput("/");
-            onSearch("");
-          }}
-          setShowShortcutKeyModal={setShowShortcutKeyModal}
-          setUserInput={setUserInput}
-        />
-        <label
-          className={`${styles["chat-input-panel-inner"]} ${
-            attachImages.length != 0
-              ? styles["chat-input-panel-inner-attach"]
-              : ""
-          }`}
-          htmlFor="chat-input"
-        >
-          <textarea
-            id="chat-input"
-            ref={inputRef}
-            className={styles["chat-input"]}
-            placeholder={Locale.Chat.Input(submitKey)}
-            onInput={(e) => onInput(e.currentTarget.value)}
-            value={userInput}
-            onKeyDown={onInputKeyDown}
-            onFocus={scrollToBottom}
-            onClick={scrollToBottom}
-            onPaste={handlePaste}
-            rows={inputRows}
-            autoFocus={autoFocus}
-            style={{
-              fontSize: config.fontSize,
-              fontFamily: config.fontFamily,
-            }}
-          />
-          {attachImages.length != 0 && (
-            <div className={styles["attach-images"]}>
-              {attachImages.map((image, index) => {
-                return (
-                  <div
-                    key={index}
-                    className={styles["attach-image"]}
-                    style={{ backgroundImage: `url("${image}")` }}
-                  >
-                    <div className={styles["attach-image-mask"]}>
-                      <DeleteImageButton
-                        deleteImage={() => {
-                          setAttachImages(
-                            attachImages.filter((_, i) => i !== index),
-                          );
-                        }}
-                      />
                     </div>
-                  </div>
+                    {shouldShowClearContextDivider && <ClearContextDivider />}
+                  </Fragment>
                 );
               })}
             </div>
-          )}
-          <IconButton
-            icon={<SendWhiteIcon />}
-            text={Locale.Chat.Send}
-            className={styles["chat-input-send"]}
-            type="primary"
-            onClick={() => doSubmit(userInput)}
-          />
-        </label>
-      </div>
+            <div className={styles["chat-input-panel"]}>
+              <PromptHints
+                prompts={promptHints}
+                onPromptSelect={onPromptSelect}
+              />
 
+              <ChatActions
+                uploadImage={uploadImage}
+                setAttachImages={setAttachImages}
+                setUploading={setUploading}
+                showPromptModal={() => setShowPromptModal(true)}
+                scrollToBottom={scrollToBottom}
+                hitBottom={hitBottom}
+                uploading={uploading}
+                showPromptHints={() => {
+                  // Click again to close
+                  if (promptHints.length > 0) {
+                    setPromptHints([]);
+                    return;
+                  }
+
+                  inputRef.current?.focus();
+                  setUserInput("/");
+                  onSearch("");
+                }}
+                setShowShortcutKeyModal={setShowShortcutKeyModal}
+                setUserInput={setUserInput}
+                setShowChatSidePanel={setShowChatSidePanel}
+              />
+              <label
+                className={clsx(styles["chat-input-panel-inner"], {
+                  [styles["chat-input-panel-inner-attach"]]:
+                    attachImages.length !== 0,
+                })}
+                htmlFor="chat-input"
+              >
+                <textarea
+                  id="chat-input"
+                  ref={inputRef}
+                  className={styles["chat-input"]}
+                  placeholder={Locale.Chat.Input(submitKey)}
+                  onInput={(e) => onInput(e.currentTarget.value)}
+                  value={userInput}
+                  onKeyDown={onInputKeyDown}
+                  onFocus={scrollToBottom}
+                  onClick={scrollToBottom}
+                  onPaste={handlePaste}
+                  rows={inputRows}
+                  autoFocus={autoFocus}
+                  style={{
+                    fontSize: config.fontSize,
+                    fontFamily: config.fontFamily,
+                  }}
+                />
+                {attachImages.length != 0 && (
+                  <div className={styles["attach-images"]}>
+                    {attachImages.map((image, index) => {
+                      return (
+                        <div
+                          key={index}
+                          className={styles["attach-image"]}
+                          style={{ backgroundImage: `url("${image}")` }}
+                        >
+                          <div className={styles["attach-image-mask"]}>
+                            <DeleteImageButton
+                              deleteImage={() => {
+                                setAttachImages(
+                                  attachImages.filter((_, i) => i !== index),
+                                );
+                              }}
+                            />
+                          </div>
+                        </div>
+                      );
+                    })}
+                  </div>
+                )}
+                <IconButton
+                  icon={<SendWhiteIcon />}
+                  text={Locale.Chat.Send}
+                  className={styles["chat-input-send"]}
+                  type="primary"
+                  onClick={() => doSubmit(userInput)}
+                />
+              </label>
+            </div>
+          </div>
+          <div
+            className={clsx(styles["chat-side-panel"], {
+              [styles["mobile"]]: isMobileScreen,
+              [styles["chat-side-panel-show"]]: showChatSidePanel,
+            })}
+          >
+            {showChatSidePanel && (
+              <RealtimeChat
+                onClose={() => {
+                  setShowChatSidePanel(false);
+                }}
+                onStartVoice={async () => {
+                  console.log("start voice");
+                }}
+              />
+            )}
+          </div>
+        </div>
+      </div>
       {showExport && (
         <ExportMessageModal onClose={() => setShowExport(false)} />
       )}
@@ -2008,12 +2091,12 @@ function _Chat() {
       {showShortcutKeyModal && (
         <ShortcutKeyModal onClose={() => setShowShortcutKeyModal(false)} />
       )}
-    </div>
+    </>
   );
 }
 
 export function Chat() {
   const chatStore = useChatStore();
-  const sessionIndex = chatStore.currentSessionIndex;
-  return <_Chat key={sessionIndex}></_Chat>;
+  const session = chatStore.currentSession();
+  return <_Chat key={session.id}></_Chat>;
 }
diff --git a/app/components/emoji.tsx b/app/components/emoji.tsx
index 6db746c462c..d75cdda9268 100644
--- a/app/components/emoji.tsx
+++ b/app/components/emoji.tsx
@@ -37,7 +37,8 @@ export function Avatar(props: { model?: ModelType; avatar?: string }) {
     return (
       <div className="no-dark">
         {props.model?.startsWith("gpt-4") ||
-        props.model?.startsWith("chatgpt-4o") ? (
+        props.model?.startsWith("chatgpt-4o") ||
+        props.model?.startsWith("o1") ? (
           <BlackBotIcon className="user-avatar" />
         ) : (
           <BotIcon className="user-avatar" />
diff --git a/app/components/exporter.tsx b/app/components/exporter.tsx
index aba8dc54466..79ae87be2d2 100644
--- a/app/components/exporter.tsx
+++ b/app/components/exporter.tsx
@@ -40,6 +40,7 @@ import { EXPORT_MESSAGE_CLASS_NAME } from "../constant";
 import { getClientConfig } from "../config/client";
 import { type ClientApi, getClientApi } from "../client/api";
 import { getMessageTextContent } from "../utils";
+import clsx from "clsx";
 
 const Markdown = dynamic(async () => (await import("./markdown")).Markdown, {
   loading: () => <LoadingIcon />,
@@ -118,9 +119,10 @@ function Steps<
           return (
             <div
               key={i}
-              className={`${styles["step"]} ${
-                styles[i <= props.index ? "step-finished" : ""]
-              } ${i === props.index && styles["step-current"]} clickable`}
+              className={clsx("clickable", styles["step"], {
+                [styles["step-finished"]]: i <= props.index,
+                [styles["step-current"]]: i === props.index,
+              })}
               onClick={() => {
                 props.onStepChange?.(i);
               }}
@@ -525,11 +527,11 @@ export function ImagePreviewer(props: {
         messages={props.messages}
       />
       <div
-        className={`${styles["preview-body"]} ${styles["default-theme"]}`}
+        className={clsx(styles["preview-body"], styles["default-theme"])}
         ref={previewRef}
       >
         <div className={styles["chat-info"]}>
-          <div className={styles["logo"] + " no-dark"}>
+          <div className={clsx(styles["logo"], "no-dark")}>
             <NextImage
               src={ChatGptIcon.src}
               alt="logo"
@@ -570,7 +572,7 @@ export function ImagePreviewer(props: {
         {props.messages.map((m, i) => {
           return (
             <div
-              className={styles["message"] + " " + styles["message-" + m.role]}
+              className={clsx(styles["message"], styles["message-" + m.role])}
               key={i}
             >
               <div className={styles["avatar"]}>
diff --git a/app/components/home.tsx b/app/components/home.tsx
index 465ad0f1ed1..5da49037885 100644
--- a/app/components/home.tsx
+++ b/app/components/home.tsx
@@ -3,7 +3,6 @@
 require("../polyfill");
 
 import { useState, useEffect } from "react";
-
 import styles from "./home.module.scss";
 
 import BotIcon from "../icons/bot.svg";
@@ -29,10 +28,11 @@ import { AuthPage } from "./auth";
 import { getClientConfig } from "../config/client";
 import { type ClientApi, getClientApi } from "../client/api";
 import { useAccessStore } from "../store";
+import clsx from "clsx";
 
 export function Loading(props: { noLogo?: boolean }) {
   return (
-    <div className={styles["loading-content"] + " no-dark"}>
+    <div className={clsx("no-dark", styles["loading-content"])}>
       {!props.noLogo && <BotIcon />}
       <LoadingIcon />
     </div>
@@ -179,7 +179,11 @@ function Screen() {
     if (isSdNew) return <Sd />;
     return (
       <>
-        <SideBar className={isHome ? styles["sidebar-show"] : ""} />
+        <SideBar
+          className={clsx({
+            [styles["sidebar-show"]]: isHome,
+          })}
+        />
         <WindowContent>
           <Routes>
             <Route path={Path.Home} element={<Chat />} />
@@ -197,9 +201,10 @@ function Screen() {
 
   return (
     <div
-      className={`${styles.container} ${
-        shouldTightBorder ? styles["tight-container"] : styles.container
-      } ${getLang() === "ar" ? styles["rtl-screen"] : ""}`}
+      className={clsx(styles.container, {
+        [styles["tight-container"]]: shouldTightBorder,
+        [styles["rtl-screen"]]: getLang() === "ar",
+      })}
     >
       {renderContent()}
     </div>
diff --git a/app/components/input-range.tsx b/app/components/input-range.tsx
index 08756e2c8d8..3c6b4982579 100644
--- a/app/components/input-range.tsx
+++ b/app/components/input-range.tsx
@@ -1,5 +1,6 @@
 import * as React from "react";
 import styles from "./input-range.module.scss";
+import clsx from "clsx";
 
 interface InputRangeProps {
   onChange: React.ChangeEventHandler<HTMLInputElement>;
@@ -23,7 +24,7 @@ export function InputRange({
   aria,
 }: InputRangeProps) {
   return (
-    <div className={styles["input-range"] + ` ${className ?? ""}`}>
+    <div className={clsx(styles["input-range"], className)}>
       {title || value}
       <input
         aria-label={aria}
diff --git a/app/components/markdown.tsx b/app/components/markdown.tsx
index 9841a196d27..ba85f09701f 100644
--- a/app/components/markdown.tsx
+++ b/app/components/markdown.tsx
@@ -23,6 +23,7 @@ import { useChatStore } from "../store";
 import { IconButton } from "./button";
 
 import { useAppConfig } from "../store/config";
+import clsx from "clsx";
 
 export function Mermaid(props: { code: string }) {
   const ref = useRef<HTMLDivElement>(null);
@@ -57,7 +58,7 @@ export function Mermaid(props: { code: string }) {
 
   return (
     <div
-      className="no-dark mermaid"
+      className={clsx("no-dark", "mermaid")}
       style={{
         cursor: "pointer",
         overflow: "auto",
@@ -89,7 +90,11 @@ export function PreCode(props: { children: any }) {
     const refText = ref.current.querySelector("code")?.innerText;
     if (htmlDom) {
       setHtmlCode((htmlDom as HTMLElement).innerText);
-    } else if (refText?.startsWith("<!DOCTYPE")) {
+    } else if (
+      refText?.startsWith("<!DOCTYPE") ||
+      refText?.startsWith("<svg") ||
+      refText?.startsWith("<?xml")
+    ) {
       setHtmlCode(refText);
     }
   }, 600);
@@ -193,7 +198,12 @@ function CustomCode(props: { children: any; className?: string }) {
   const renderShowMoreButton = () => {
     if (showToggle && enableCodeFold && collapsed) {
       return (
-        <div className={`show-hide-button ${collapsed ? "collapsed" : "expanded"}`}>
+        <div
+          className={clsx("show-hide-button", {
+            collapsed,
+            expanded: !collapsed,
+          })}
+        >
           <button onClick={toggleCollapsed}>{Locale.NewChat.More}</button>
         </div>
       );
@@ -203,7 +213,7 @@ function CustomCode(props: { children: any; className?: string }) {
   return (
     <>
       <code
-        className={props?.className}
+        className={clsx(props?.className)}
         ref={ref}
         style={{
           maxHeight: enableCodeFold && collapsed ? "400px" : "none",
@@ -238,6 +248,10 @@ function escapeBrackets(text: string) {
 
 function tryWrapHtmlCode(text: string) {
   // try add wrap html code (fixed: html codeblock include 2 newline)
+  // ignore embed codeblock
+  if (text.includes("```")) {
+    return text;
+  }
   return text
     .replace(
       /([`]*?)(\w*?)([\n\r]*?)(<!DOCTYPE html>)/g,
diff --git a/app/components/mask.tsx b/app/components/mask.tsx
index 12b19e33514..fa9537cbafa 100644
--- a/app/components/mask.tsx
+++ b/app/components/mask.tsx
@@ -55,6 +55,7 @@ import {
   OnDragEndResponder,
 } from "@hello-pangea/dnd";
 import { getMessageTextContent } from "../utils";
+import clsx from "clsx";
 
 // drag and drop helper function
 function reorder<T>(list: T[], startIndex: number, endIndex: number): T[] {
@@ -588,7 +589,7 @@ export function MaskPage() {
                   </div>
                   <div className={styles["mask-title"]}>
                     <div className={styles["mask-name"]}>{m.name}</div>
-                    <div className={styles["mask-info"] + " one-line"}>
+                    <div className={clsx(styles["mask-info"], "one-line")}>
                       {`${Locale.Mask.Item.Info(m.context.length)} / ${
                         ALL_LANG_OPTIONS[m.lang]
                       } / ${m.modelConfig.model}`}
diff --git a/app/components/message-selector.tsx b/app/components/message-selector.tsx
index 8198a3cd4eb..8dc994a4546 100644
--- a/app/components/message-selector.tsx
+++ b/app/components/message-selector.tsx
@@ -8,6 +8,7 @@ import Locale from "../locales";
 
 import styles from "./message-selector.module.scss";
 import { getMessageTextContent } from "../utils";
+import clsx from "clsx";
 
 function useShiftRange() {
   const [startIndex, setStartIndex] = useState<number>();
@@ -71,6 +72,7 @@ export function MessageSelector(props: {
   defaultSelectAll?: boolean;
   onSelected?: (messages: ChatMessage[]) => void;
 }) {
+  const LATEST_COUNT = 4;
   const chatStore = useChatStore();
   const session = chatStore.currentSession();
   const isValid = (m: ChatMessage) => m.content && !m.isError && !m.streaming;
@@ -141,15 +143,13 @@ export function MessageSelector(props: {
     // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [startIndex, endIndex]);
 
-  const LATEST_COUNT = 4;
-
   return (
     <div className={styles["message-selector"]}>
       <div className={styles["message-filter"]}>
         <input
           type="text"
           placeholder={Locale.Select.Search}
-          className={styles["filter-item"] + " " + styles["search-bar"]}
+          className={clsx(styles["filter-item"], styles["search-bar"])}
           value={searchInput}
           onInput={(e) => {
             setSearchInput(e.currentTarget.value);
@@ -196,9 +196,9 @@ export function MessageSelector(props: {
 
           return (
             <div
-              className={`${styles["message"]} ${
-                props.selection.has(m.id!) && styles["message-selected"]
-              }`}
+              className={clsx(styles["message"], {
+                [styles["message-selected"]]: props.selection.has(m.id!),
+              })}
               key={i}
               onClick={() => {
                 props.updateSelection((selection) => {
@@ -221,7 +221,7 @@ export function MessageSelector(props: {
                 <div className={styles["date"]}>
                   {new Date(m.date).toLocaleString()}
                 </div>
-                <div className={`${styles["content"]} one-line`}>
+                <div className={clsx(styles["content"], "one-line")}>
                   {getMessageTextContent(m)}
                 </div>
               </div>
diff --git a/app/components/model-config.tsx b/app/components/model-config.tsx
index f2297e10b49..e845bfeac7a 100644
--- a/app/components/model-config.tsx
+++ b/app/components/model-config.tsx
@@ -7,6 +7,7 @@ import { ListItem, Select } from "./ui-lib";
 import { useAllModels } from "../utils/hooks";
 import { groupBy } from "lodash-es";
 import styles from "./model-config.module.scss";
+import { getModelProvider } from "../utils/model";
 
 export function ModelConfigList(props: {
   modelConfig: ModelConfig;
@@ -28,7 +29,9 @@ export function ModelConfigList(props: {
           value={value}
           align="left"
           onChange={(e) => {
-            const [model, providerName] = e.currentTarget.value.split("@");
+            const [model, providerName] = getModelProvider(
+              e.currentTarget.value,
+            );
             props.updateConfig((config) => {
               config.model = ModalConfigValidator.model(model);
               config.providerName = providerName as ServiceProvider;
@@ -247,7 +250,9 @@ export function ModelConfigList(props: {
           aria-label={Locale.Settings.CompressModel.Title}
           value={compressModelValue}
           onChange={(e) => {
-            const [model, providerName] = e.currentTarget.value.split("@");
+            const [model, providerName] = getModelProvider(
+              e.currentTarget.value,
+            );
             props.updateConfig((config) => {
               config.compressModel = ModalConfigValidator.model(model);
               config.compressProviderName = providerName as ServiceProvider;
diff --git a/app/components/new-chat.tsx b/app/components/new-chat.tsx
index 54c646f237c..2f4e54c402e 100644
--- a/app/components/new-chat.tsx
+++ b/app/components/new-chat.tsx
@@ -16,6 +16,7 @@ import { MaskAvatar } from "./mask";
 import { useCommand } from "../command";
 import { showConfirm } from "./ui-lib";
 import { BUILTIN_MASK_STORE } from "../masks";
+import clsx from "clsx";
 
 function MaskItem(props: { mask: Mask; onClick?: () => void }) {
   return (
@@ -24,7 +25,9 @@ function MaskItem(props: { mask: Mask; onClick?: () => void }) {
         avatar={props.mask.avatar}
         model={props.mask.modelConfig.model}
       />
-      <div className={styles["mask-name"] + " one-line"}>{props.mask.name}</div>
+      <div className={clsx(styles["mask-name"], "one-line")}>
+        {props.mask.name}
+      </div>
     </div>
   );
 }
diff --git a/app/components/plugin.tsx b/app/components/plugin.tsx
index 29279742a8b..d5bcc66b86d 100644
--- a/app/components/plugin.tsx
+++ b/app/components/plugin.tsx
@@ -28,6 +28,7 @@ import {
 import Locale from "../locales";
 import { useNavigate } from "react-router-dom";
 import { useState } from "react";
+import clsx from "clsx";
 
 export function PluginPage() {
   const navigate = useNavigate();
@@ -199,7 +200,7 @@ export function PluginPage() {
                     <div className={styles["mask-name"]}>
                       {m.title}@<small>{m.version}</small>
                     </div>
-                    <div className={styles["mask-info"] + " one-line"}>
+                    <div className={clsx(styles["mask-info"], "one-line")}>
                       {Locale.Plugin.Item.Info(
                         FunctionToolService.add(m).length,
                       )}
@@ -335,7 +336,10 @@ export function PluginPage() {
               <ListItem
                 subTitle={
                   <div
-                    className={`markdown-body ${pluginStyles["plugin-content"]}`}
+                    className={clsx(
+                      "markdown-body",
+                      pluginStyles["plugin-content"],
+                    )}
                     dir="auto"
                   >
                     <pre>
diff --git a/app/components/realtime-chat/index.ts b/app/components/realtime-chat/index.ts
new file mode 100644
index 00000000000..fdf090f4195
--- /dev/null
+++ b/app/components/realtime-chat/index.ts
@@ -0,0 +1 @@
+export * from "./realtime-chat";
diff --git a/app/components/realtime-chat/realtime-chat.module.scss b/app/components/realtime-chat/realtime-chat.module.scss
new file mode 100644
index 00000000000..ef58bebb655
--- /dev/null
+++ b/app/components/realtime-chat/realtime-chat.module.scss
@@ -0,0 +1,74 @@
+.realtime-chat {
+  width: 100%;
+  justify-content: center;
+  align-items: center;
+  position: relative;
+  display: flex;
+  flex-direction: column;
+  height: 100%;
+  padding: 20px;
+  box-sizing: border-box;
+  .circle-mic {
+    width: 150px;
+    height: 150px;
+    border-radius: 50%;
+    background: linear-gradient(to bottom right, #a0d8ef, #f0f8ff);
+    display: flex;
+    justify-content: center;
+    align-items: center;
+  }
+  .icon-center {
+    font-size: 24px;
+  }
+
+  .bottom-icons {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    width: 100%;
+    position: absolute;
+    bottom: 20px;
+    box-sizing: border-box;
+    padding: 0 20px;
+  }
+
+  .icon-left,
+  .icon-right {
+    width: 46px;
+    height: 46px;
+    font-size: 36px;
+    background: var(--second);
+    border-radius: 50%;
+    padding: 2px;
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    cursor: pointer;
+    &:hover {
+      opacity: 0.8;
+    }
+  }
+
+  &.mobile {
+    display: none;
+  }
+}
+
+.pulse {
+  animation: pulse 1.5s infinite;
+}
+
+@keyframes pulse {
+  0% {
+    transform: scale(1);
+    opacity: 0.7;
+  }
+  50% {
+    transform: scale(1.1);
+    opacity: 1;
+  }
+  100% {
+    transform: scale(1);
+    opacity: 0.7;
+  }
+}
diff --git a/app/components/realtime-chat/realtime-chat.tsx b/app/components/realtime-chat/realtime-chat.tsx
new file mode 100644
index 00000000000..faa36373a2c
--- /dev/null
+++ b/app/components/realtime-chat/realtime-chat.tsx
@@ -0,0 +1,359 @@
+import VoiceIcon from "@/app/icons/voice.svg";
+import VoiceOffIcon from "@/app/icons/voice-off.svg";
+import PowerIcon from "@/app/icons/power.svg";
+
+import styles from "./realtime-chat.module.scss";
+import clsx from "clsx";
+
+import { useState, useRef, useEffect } from "react";
+
+import { useChatStore, createMessage, useAppConfig } from "@/app/store";
+
+import { IconButton } from "@/app/components/button";
+
+import {
+  Modality,
+  RTClient,
+  RTInputAudioItem,
+  RTResponse,
+  TurnDetection,
+} from "rt-client";
+import { AudioHandler } from "@/app/lib/audio";
+import { uploadImage } from "@/app/utils/chat";
+import { VoicePrint } from "@/app/components/voice-print";
+
+interface RealtimeChatProps {
+  onClose?: () => void;
+  onStartVoice?: () => void;
+  onPausedVoice?: () => void;
+}
+
+export function RealtimeChat({
+  onClose,
+  onStartVoice,
+  onPausedVoice,
+}: RealtimeChatProps) {
+  const chatStore = useChatStore();
+  const session = chatStore.currentSession();
+  const config = useAppConfig();
+  const [status, setStatus] = useState("");
+  const [isRecording, setIsRecording] = useState(false);
+  const [isConnected, setIsConnected] = useState(false);
+  const [isConnecting, setIsConnecting] = useState(false);
+  const [modality, setModality] = useState("audio");
+  const [useVAD, setUseVAD] = useState(true);
+  const [frequencies, setFrequencies] = useState<Uint8Array | undefined>();
+
+  const clientRef = useRef<RTClient | null>(null);
+  const audioHandlerRef = useRef<AudioHandler | null>(null);
+  const initRef = useRef(false);
+
+  const temperature = config.realtimeConfig.temperature;
+  const apiKey = config.realtimeConfig.apiKey;
+  const model = config.realtimeConfig.model;
+  const azure = config.realtimeConfig.provider === "Azure";
+  const azureEndpoint = config.realtimeConfig.azure.endpoint;
+  const azureDeployment = config.realtimeConfig.azure.deployment;
+  const voice = config.realtimeConfig.voice;
+
+  const handleConnect = async () => {
+    if (isConnecting) return;
+    if (!isConnected) {
+      try {
+        setIsConnecting(true);
+        clientRef.current = azure
+          ? new RTClient(
+              new URL(azureEndpoint),
+              { key: apiKey },
+              { deployment: azureDeployment },
+            )
+          : new RTClient({ key: apiKey }, { model });
+        const modalities: Modality[] =
+          modality === "audio" ? ["text", "audio"] : ["text"];
+        const turnDetection: TurnDetection = useVAD
+          ? { type: "server_vad" }
+          : null;
+        await clientRef.current.configure({
+          instructions: "",
+          voice,
+          input_audio_transcription: { model: "whisper-1" },
+          turn_detection: turnDetection,
+          tools: [],
+          temperature,
+          modalities,
+        });
+        startResponseListener();
+
+        setIsConnected(true);
+        // TODO
+        // try {
+        //   const recentMessages = chatStore.getMessagesWithMemory();
+        //   for (const message of recentMessages) {
+        //     const { role, content } = message;
+        //     if (typeof content === "string") {
+        //       await clientRef.current.sendItem({
+        //         type: "message",
+        //         role: role as any,
+        //         content: [
+        //           {
+        //             type: (role === "assistant" ? "text" : "input_text") as any,
+        //             text: content as string,
+        //           },
+        //         ],
+        //       });
+        //     }
+        //   }
+        //   // await clientRef.current.generateResponse();
+        // } catch (error) {
+        //   console.error("Set message failed:", error);
+        // }
+      } catch (error) {
+        console.error("Connection failed:", error);
+        setStatus("Connection failed");
+      } finally {
+        setIsConnecting(false);
+      }
+    } else {
+      await disconnect();
+    }
+  };
+
+  const disconnect = async () => {
+    if (clientRef.current) {
+      try {
+        await clientRef.current.close();
+        clientRef.current = null;
+        setIsConnected(false);
+      } catch (error) {
+        console.error("Disconnect failed:", error);
+      }
+    }
+  };
+
+  const startResponseListener = async () => {
+    if (!clientRef.current) return;
+
+    try {
+      for await (const serverEvent of clientRef.current.events()) {
+        if (serverEvent.type === "response") {
+          await handleResponse(serverEvent);
+        } else if (serverEvent.type === "input_audio") {
+          await handleInputAudio(serverEvent);
+        }
+      }
+    } catch (error) {
+      if (clientRef.current) {
+        console.error("Response iteration error:", error);
+      }
+    }
+  };
+
+  const handleResponse = async (response: RTResponse) => {
+    for await (const item of response) {
+      if (item.type === "message" && item.role === "assistant") {
+        const botMessage = createMessage({
+          role: item.role,
+          content: "",
+        });
+        // add bot message first
+        chatStore.updateTargetSession(session, (session) => {
+          session.messages = session.messages.concat([botMessage]);
+        });
+        let hasAudio = false;
+        for await (const content of item) {
+          if (content.type === "text") {
+            for await (const text of content.textChunks()) {
+              botMessage.content += text;
+            }
+          } else if (content.type === "audio") {
+            const textTask = async () => {
+              for await (const text of content.transcriptChunks()) {
+                botMessage.content += text;
+              }
+            };
+            const audioTask = async () => {
+              audioHandlerRef.current?.startStreamingPlayback();
+              for await (const audio of content.audioChunks()) {
+                hasAudio = true;
+                audioHandlerRef.current?.playChunk(audio);
+              }
+            };
+            await Promise.all([textTask(), audioTask()]);
+          }
+          // update message.content
+          chatStore.updateTargetSession(session, (session) => {
+            session.messages = session.messages.concat();
+          });
+        }
+        if (hasAudio) {
+          // upload audio get audio_url
+          const blob = audioHandlerRef.current?.savePlayFile();
+          uploadImage(blob!).then((audio_url) => {
+            botMessage.audio_url = audio_url;
+            // update text and audio_url
+            chatStore.updateTargetSession(session, (session) => {
+              session.messages = session.messages.concat();
+            });
+          });
+        }
+      }
+    }
+  };
+
+  const handleInputAudio = async (item: RTInputAudioItem) => {
+    await item.waitForCompletion();
+    if (item.transcription) {
+      const userMessage = createMessage({
+        role: "user",
+        content: item.transcription,
+      });
+      chatStore.updateTargetSession(session, (session) => {
+        session.messages = session.messages.concat([userMessage]);
+      });
+      // save input audio_url, and update session
+      const { audioStartMillis, audioEndMillis } = item;
+      // upload audio get audio_url
+      const blob = audioHandlerRef.current?.saveRecordFile(
+        audioStartMillis,
+        audioEndMillis,
+      );
+      uploadImage(blob!).then((audio_url) => {
+        userMessage.audio_url = audio_url;
+        chatStore.updateTargetSession(session, (session) => {
+          session.messages = session.messages.concat();
+        });
+      });
+    }
+    // stop streaming play after get input audio.
+    audioHandlerRef.current?.stopStreamingPlayback();
+  };
+
+  const toggleRecording = async () => {
+    if (!isRecording && clientRef.current) {
+      try {
+        if (!audioHandlerRef.current) {
+          audioHandlerRef.current = new AudioHandler();
+          await audioHandlerRef.current.initialize();
+        }
+        await audioHandlerRef.current.startRecording(async (chunk) => {
+          await clientRef.current?.sendAudio(chunk);
+        });
+        setIsRecording(true);
+      } catch (error) {
+        console.error("Failed to start recording:", error);
+      }
+    } else if (audioHandlerRef.current) {
+      try {
+        audioHandlerRef.current.stopRecording();
+        if (!useVAD) {
+          const inputAudio = await clientRef.current?.commitAudio();
+          await handleInputAudio(inputAudio!);
+          await clientRef.current?.generateResponse();
+        }
+        setIsRecording(false);
+      } catch (error) {
+        console.error("Failed to stop recording:", error);
+      }
+    }
+  };
+
+  useEffect(() => {
+    // 防止重复初始化
+    if (initRef.current) return;
+    initRef.current = true;
+
+    const initAudioHandler = async () => {
+      const handler = new AudioHandler();
+      await handler.initialize();
+      audioHandlerRef.current = handler;
+      await handleConnect();
+      await toggleRecording();
+    };
+
+    initAudioHandler().catch((error) => {
+      setStatus(error);
+      console.error(error);
+    });
+
+    return () => {
+      if (isRecording) {
+        toggleRecording();
+      }
+      audioHandlerRef.current?.close().catch(console.error);
+      disconnect();
+    };
+  }, []);
+
+  useEffect(() => {
+    let animationFrameId: number;
+
+    if (isConnected && isRecording) {
+      const animationFrame = () => {
+        if (audioHandlerRef.current) {
+          const freqData = audioHandlerRef.current.getByteFrequencyData();
+          setFrequencies(freqData);
+        }
+        animationFrameId = requestAnimationFrame(animationFrame);
+      };
+
+      animationFrameId = requestAnimationFrame(animationFrame);
+    } else {
+      setFrequencies(undefined);
+    }
+
+    return () => {
+      if (animationFrameId) {
+        cancelAnimationFrame(animationFrameId);
+      }
+    };
+  }, [isConnected, isRecording]);
+
+  // update session params
+  useEffect(() => {
+    clientRef.current?.configure({ voice });
+  }, [voice]);
+  useEffect(() => {
+    clientRef.current?.configure({ temperature });
+  }, [temperature]);
+
+  const handleClose = async () => {
+    onClose?.();
+    if (isRecording) {
+      await toggleRecording();
+    }
+    disconnect().catch(console.error);
+  };
+
+  return (
+    <div className={styles["realtime-chat"]}>
+      <div
+        className={clsx(styles["circle-mic"], {
+          [styles["pulse"]]: isRecording,
+        })}
+      >
+        <VoicePrint frequencies={frequencies} isActive={isRecording} />
+      </div>
+
+      <div className={styles["bottom-icons"]}>
+        <div>
+          <IconButton
+            icon={isRecording ? <VoiceIcon /> : <VoiceOffIcon />}
+            onClick={toggleRecording}
+            disabled={!isConnected}
+            shadow
+            bordered
+          />
+        </div>
+        <div className={styles["icon-center"]}>{status}</div>
+        <div>
+          <IconButton
+            icon={<PowerIcon />}
+            onClick={handleClose}
+            shadow
+            bordered
+          />
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/app/components/realtime-chat/realtime-config.tsx b/app/components/realtime-chat/realtime-config.tsx
new file mode 100644
index 00000000000..08809afda2f
--- /dev/null
+++ b/app/components/realtime-chat/realtime-config.tsx
@@ -0,0 +1,173 @@
+import { RealtimeConfig } from "@/app/store";
+
+import Locale from "@/app/locales";
+import { ListItem, Select, PasswordInput } from "@/app/components/ui-lib";
+
+import { InputRange } from "@/app/components/input-range";
+import { Voice } from "rt-client";
+import { ServiceProvider } from "@/app/constant";
+
+const providers = [ServiceProvider.OpenAI, ServiceProvider.Azure];
+
+const models = ["gpt-4o-realtime-preview-2024-10-01"];
+
+const voice = ["alloy", "shimmer", "echo"];
+
+export function RealtimeConfigList(props: {
+  realtimeConfig: RealtimeConfig;
+  updateConfig: (updater: (config: RealtimeConfig) => void) => void;
+}) {
+  const azureConfigComponent = props.realtimeConfig.provider ===
+    ServiceProvider.Azure && (
+    <>
+      <ListItem
+        title={Locale.Settings.Realtime.Azure.Endpoint.Title}
+        subTitle={Locale.Settings.Realtime.Azure.Endpoint.SubTitle}
+      >
+        <input
+          value={props.realtimeConfig?.azure?.endpoint}
+          type="text"
+          placeholder={Locale.Settings.Realtime.Azure.Endpoint.Title}
+          onChange={(e) => {
+            props.updateConfig(
+              (config) => (config.azure.endpoint = e.currentTarget.value),
+            );
+          }}
+        />
+      </ListItem>
+      <ListItem
+        title={Locale.Settings.Realtime.Azure.Deployment.Title}
+        subTitle={Locale.Settings.Realtime.Azure.Deployment.SubTitle}
+      >
+        <input
+          value={props.realtimeConfig?.azure?.deployment}
+          type="text"
+          placeholder={Locale.Settings.Realtime.Azure.Deployment.Title}
+          onChange={(e) => {
+            props.updateConfig(
+              (config) => (config.azure.deployment = e.currentTarget.value),
+            );
+          }}
+        />
+      </ListItem>
+    </>
+  );
+
+  return (
+    <>
+      <ListItem
+        title={Locale.Settings.Realtime.Enable.Title}
+        subTitle={Locale.Settings.Realtime.Enable.SubTitle}
+      >
+        <input
+          type="checkbox"
+          checked={props.realtimeConfig.enable}
+          onChange={(e) =>
+            props.updateConfig(
+              (config) => (config.enable = e.currentTarget.checked),
+            )
+          }
+        ></input>
+      </ListItem>
+
+      {props.realtimeConfig.enable && (
+        <>
+          <ListItem
+            title={Locale.Settings.Realtime.Provider.Title}
+            subTitle={Locale.Settings.Realtime.Provider.SubTitle}
+          >
+            <Select
+              aria-label={Locale.Settings.Realtime.Provider.Title}
+              value={props.realtimeConfig.provider}
+              onChange={(e) => {
+                props.updateConfig(
+                  (config) =>
+                    (config.provider = e.target.value as ServiceProvider),
+                );
+              }}
+            >
+              {providers.map((v, i) => (
+                <option value={v} key={i}>
+                  {v}
+                </option>
+              ))}
+            </Select>
+          </ListItem>
+          <ListItem
+            title={Locale.Settings.Realtime.Model.Title}
+            subTitle={Locale.Settings.Realtime.Model.SubTitle}
+          >
+            <Select
+              aria-label={Locale.Settings.Realtime.Model.Title}
+              value={props.realtimeConfig.model}
+              onChange={(e) => {
+                props.updateConfig((config) => (config.model = e.target.value));
+              }}
+            >
+              {models.map((v, i) => (
+                <option value={v} key={i}>
+                  {v}
+                </option>
+              ))}
+            </Select>
+          </ListItem>
+          <ListItem
+            title={Locale.Settings.Realtime.ApiKey.Title}
+            subTitle={Locale.Settings.Realtime.ApiKey.SubTitle}
+          >
+            <PasswordInput
+              aria={Locale.Settings.ShowPassword}
+              aria-label={Locale.Settings.Realtime.ApiKey.Title}
+              value={props.realtimeConfig.apiKey}
+              type="text"
+              placeholder={Locale.Settings.Realtime.ApiKey.Placeholder}
+              onChange={(e) => {
+                props.updateConfig(
+                  (config) => (config.apiKey = e.currentTarget.value),
+                );
+              }}
+            />
+          </ListItem>
+          {azureConfigComponent}
+          <ListItem
+            title={Locale.Settings.TTS.Voice.Title}
+            subTitle={Locale.Settings.TTS.Voice.SubTitle}
+          >
+            <Select
+              value={props.realtimeConfig.voice}
+              onChange={(e) => {
+                props.updateConfig(
+                  (config) => (config.voice = e.currentTarget.value as Voice),
+                );
+              }}
+            >
+              {voice.map((v, i) => (
+                <option value={v} key={i}>
+                  {v}
+                </option>
+              ))}
+            </Select>
+          </ListItem>
+          <ListItem
+            title={Locale.Settings.Realtime.Temperature.Title}
+            subTitle={Locale.Settings.Realtime.Temperature.SubTitle}
+          >
+            <InputRange
+              aria={Locale.Settings.Temperature.Title}
+              value={props.realtimeConfig?.temperature?.toFixed(1)}
+              min="0.6"
+              max="1"
+              step="0.1"
+              onChange={(e) => {
+                props.updateConfig(
+                  (config) =>
+                    (config.temperature = e.currentTarget.valueAsNumber),
+                );
+              }}
+            ></InputRange>
+          </ListItem>
+        </>
+      )}
+    </>
+  );
+}
diff --git a/app/components/sd/sd-panel.tsx b/app/components/sd/sd-panel.tsx
index a71e560ddef..15aff0ab608 100644
--- a/app/components/sd/sd-panel.tsx
+++ b/app/components/sd/sd-panel.tsx
@@ -4,6 +4,7 @@ import { Select } from "@/app/components/ui-lib";
 import { IconButton } from "@/app/components/button";
 import Locale from "@/app/locales";
 import { useSdStore } from "@/app/store/sd";
+import clsx from "clsx";
 
 export const params = [
   {
@@ -136,7 +137,7 @@ export function ControlParamItem(props: {
   className?: string;
 }) {
   return (
-    <div className={styles["ctrl-param-item"] + ` ${props.className || ""}`}>
+    <div className={clsx(styles["ctrl-param-item"], props.className)}>
       <div className={styles["ctrl-param-item-header"]}>
         <div className={styles["ctrl-param-item-title"]}>
           <div>
diff --git a/app/components/sd/sd.tsx b/app/components/sd/sd.tsx
index 0ace62a83cb..1ccc0647e4c 100644
--- a/app/components/sd/sd.tsx
+++ b/app/components/sd/sd.tsx
@@ -36,6 +36,7 @@ import { removeImage } from "@/app/utils/chat";
 import { SideBar } from "./sd-sidebar";
 import { WindowContent } from "@/app/components/home";
 import { params } from "./sd-panel";
+import clsx from "clsx";
 
 function getSdTaskStatus(item: any) {
   let s: string;
@@ -104,7 +105,7 @@ export function Sd() {
 
   return (
     <>
-      <SideBar className={isSd ? homeStyles["sidebar-show"] : ""} />
+      <SideBar className={clsx({ [homeStyles["sidebar-show"]]: isSd })} />
       <WindowContent>
         <div className={chatStyles.chat} key={"1"}>
           <div className="window-header" data-tauri-drag-region>
@@ -121,7 +122,10 @@ export function Sd() {
               </div>
             )}
             <div
-              className={`window-header-title ${chatStyles["chat-body-title"]}`}
+              className={clsx(
+                "window-header-title",
+                chatStyles["chat-body-title"],
+              )}
             >
               <div className={`window-header-main-title`}>Stability AI</div>
               <div className="window-header-sub-title">
diff --git a/app/components/settings.tsx b/app/components/settings.tsx
index 72a39174669..470fe77f87a 100644
--- a/app/components/settings.tsx
+++ b/app/components/settings.tsx
@@ -85,6 +85,7 @@ import { nanoid } from "nanoid";
 import { useMaskStore } from "../store/mask";
 import { ProviderType } from "../utils/cloud";
 import { TTSConfigList } from "./tts-config";
+import { RealtimeConfigList } from "./realtime-chat/realtime-config";
 
 function EditPromptModal(props: { id: string; onClose: () => void }) {
   const promptStore = usePromptStore();
@@ -1785,9 +1786,11 @@ export function Settings() {
           <ListItem
             title={Locale.Settings.Access.CustomModel.Title}
             subTitle={Locale.Settings.Access.CustomModel.SubTitle}
+            vertical={true}
           >
             <input
               aria-label={Locale.Settings.Access.CustomModel.Title}
+              style={{ width: "100%", maxWidth: "unset", textAlign: "left" }}
               type="text"
               value={config.customModels}
               placeholder="model1,model2,model3"
@@ -1814,7 +1817,18 @@ export function Settings() {
         {shouldShowPromptModal && (
           <UserPromptModal onClose={() => setShowPromptModal(false)} />
         )}
-
+        <List>
+          <RealtimeConfigList
+            realtimeConfig={config.realtimeConfig}
+            updateConfig={(updater) => {
+              const realtimeConfig = { ...config.realtimeConfig };
+              updater(realtimeConfig);
+              config.update(
+                (config) => (config.realtimeConfig = realtimeConfig),
+              );
+            }}
+          />
+        </List>
         <List>
           <TTSConfigList
             ttsConfig={config.ttsConfig}
diff --git a/app/components/sidebar.tsx b/app/components/sidebar.tsx
index 2a5c308b78c..a5e33b15ea3 100644
--- a/app/components/sidebar.tsx
+++ b/app/components/sidebar.tsx
@@ -30,6 +30,7 @@ import { Link, useNavigate } from "react-router-dom";
 import { isIOS, useMobileScreen } from "../utils";
 import dynamic from "next/dynamic";
 import { showConfirm, Selector } from "./ui-lib";
+import clsx from "clsx";
 
 const ChatList = dynamic(async () => (await import("./chat-list")).ChatList, {
   loading: () => null,
@@ -141,9 +142,9 @@ export function SideBarContainer(props: {
   const { children, className, onDragStart, shouldNarrow } = props;
   return (
     <div
-      className={`${styles.sidebar} ${className} ${
-        shouldNarrow && styles["narrow-sidebar"]
-      }`}
+      className={clsx(styles.sidebar, className, {
+        [styles["narrow-sidebar"]]: shouldNarrow,
+      })}
       style={{
         // #3016 disable transition on ios mobile screen
         transition: isMobileScreen && isIOSMobile ? "none" : undefined,
@@ -171,9 +172,9 @@ export function SideBarHeader(props: {
   return (
     <Fragment>
       <div
-        className={`${styles["sidebar-header"]} ${
-          shouldNarrow ? styles["sidebar-header-narrow"] : ""
-        }`}
+        className={clsx(styles["sidebar-header"], {
+          [styles["sidebar-header-narrow"]]: shouldNarrow,
+        })}
         data-tauri-drag-region
       >
         <div className={styles["sidebar-title-container"]}>
@@ -182,7 +183,7 @@ export function SideBarHeader(props: {
           </div>
           <div className={styles["sidebar-sub-title"]}>{subTitle}</div>
         </div>
-        <div className={styles["sidebar-logo"] + " no-dark"}>{logo}</div>
+        <div className={clsx(styles["sidebar-logo"], "no-dark")}>{logo}</div>
       </div>
       {children}
     </Fragment>
@@ -286,7 +287,7 @@ export function SideBar(props: { className?: string }) {
       <SideBarTail
         primaryAction={
           <>
-            <div className={styles["sidebar-action"] + " " + styles.mobile}>
+            <div className={clsx(styles["sidebar-action"], styles.mobile)}>
               <IconButton
                 icon={<DeleteIcon />}
                 onClick={async () => {
diff --git a/app/components/ui-lib.tsx b/app/components/ui-lib.tsx
index 4af37dbba1c..a642652358f 100644
--- a/app/components/ui-lib.tsx
+++ b/app/components/ui-lib.tsx
@@ -23,6 +23,7 @@ import React, {
   useRef,
 } from "react";
 import { IconButton } from "./button";
+import clsx from "clsx";
 
 export function Popover(props: {
   children: JSX.Element;
@@ -45,7 +46,7 @@ export function Popover(props: {
 
 export function Card(props: { children: JSX.Element[]; className?: string }) {
   return (
-    <div className={styles.card + " " + props.className}>{props.children}</div>
+    <div className={clsx(styles.card, props.className)}>{props.children}</div>
   );
 }
 
@@ -60,11 +61,13 @@ export function ListItem(props: {
 }) {
   return (
     <div
-      className={
-        styles["list-item"] +
-        ` ${props.vertical ? styles["vertical"] : ""} ` +
-        ` ${props.className || ""}`
-      }
+      className={clsx(
+        styles["list-item"],
+        {
+          [styles["vertical"]]: props.vertical,
+        },
+        props.className,
+      )}
       onClick={props.onClick}
     >
       <div className={styles["list-header"]}>
@@ -135,9 +138,9 @@ export function Modal(props: ModalProps) {
 
   return (
     <div
-      className={
-        styles["modal-container"] + ` ${isMax && styles["modal-container-max"]}`
-      }
+      className={clsx(styles["modal-container"], {
+        [styles["modal-container-max"]]: isMax,
+      })}
     >
       <div className={styles["modal-header"]}>
         <div className={styles["modal-title"]}>{props.title}</div>
@@ -260,7 +263,7 @@ export function Input(props: InputProps) {
   return (
     <textarea
       {...props}
-      className={`${styles["input"]} ${props.className}`}
+      className={clsx(styles["input"], props.className)}
     ></textarea>
   );
 }
@@ -301,9 +304,13 @@ export function Select(
   const { className, children, align, ...otherProps } = props;
   return (
     <div
-      className={`${styles["select-with-icon"]} ${
-        align === "left" ? styles["left-align-option"] : ""
-      } ${className}`}
+      className={clsx(
+        styles["select-with-icon"],
+        {
+          [styles["left-align-option"]]: align === "left",
+        },
+        className,
+      )}
     >
       <select className={styles["select-with-icon-select"]} {...otherProps}>
         {children}
@@ -509,9 +516,9 @@ export function Selector<T>(props: {
             const selected = selectedValues.includes(item.value);
             return (
               <ListItem
-                className={`${styles["selector-item"]} ${
-                  item.disable && styles["selector-item-disabled"]
-                }`}
+                className={clsx(styles["selector-item"], {
+                  [styles["selector-item-disabled"]]: item.disable,
+                })}
                 key={i}
                 title={item.title}
                 subTitle={item.subTitle}
diff --git a/app/components/voice-print/index.ts b/app/components/voice-print/index.ts
new file mode 100644
index 00000000000..221a695387d
--- /dev/null
+++ b/app/components/voice-print/index.ts
@@ -0,0 +1 @@
+export * from "./voice-print";
diff --git a/app/components/voice-print/voice-print.module.scss b/app/components/voice-print/voice-print.module.scss
new file mode 100644
index 00000000000..b6e51fff409
--- /dev/null
+++ b/app/components/voice-print/voice-print.module.scss
@@ -0,0 +1,11 @@
+.voice-print {
+  width: 100%;
+  height: 60px;
+  margin: 20px 0;
+
+  canvas {
+    width: 100%;
+    height: 100%;
+    filter: brightness(1.2); // 增加整体亮度
+  }
+}
diff --git a/app/components/voice-print/voice-print.tsx b/app/components/voice-print/voice-print.tsx
new file mode 100644
index 00000000000..793210c1930
--- /dev/null
+++ b/app/components/voice-print/voice-print.tsx
@@ -0,0 +1,180 @@
+import { useEffect, useRef, useCallback } from "react";
+import styles from "./voice-print.module.scss";
+
+interface VoicePrintProps {
+  frequencies?: Uint8Array;
+  isActive?: boolean;
+}
+
+export function VoicePrint({ frequencies, isActive }: VoicePrintProps) {
+  // Canvas引用,用于获取绘图上下文
+  const canvasRef = useRef<HTMLCanvasElement>(null);
+  // 存储历史频率数据,用于平滑处理
+  const historyRef = useRef<number[][]>([]);
+  // 控制保留的历史数据帧数,影响平滑度
+  const historyLengthRef = useRef(10);
+  // 存储动画帧ID,用于清理
+  const animationFrameRef = useRef<number>();
+
+  /**
+   * 更新频率历史数据
+   * 使用FIFO队列维护固定长度的历史记录
+   */
+  const updateHistory = useCallback((freqArray: number[]) => {
+    historyRef.current.push(freqArray);
+    if (historyRef.current.length > historyLengthRef.current) {
+      historyRef.current.shift();
+    }
+  }, []);
+
+  useEffect(() => {
+    const canvas = canvasRef.current;
+    if (!canvas) return;
+
+    const ctx = canvas.getContext("2d");
+    if (!ctx) return;
+
+    /**
+     * 处理高DPI屏幕显示
+     * 根据设备像素比例调整canvas实际渲染分辨率
+     */
+    const dpr = window.devicePixelRatio || 1;
+    canvas.width = canvas.offsetWidth * dpr;
+    canvas.height = canvas.offsetHeight * dpr;
+    ctx.scale(dpr, dpr);
+
+    /**
+     * 主要绘制函数
+     * 使用requestAnimationFrame实现平滑动画
+     * 包含以下步骤:
+     * 1. 清空画布
+     * 2. 更新历史数据
+     * 3. 计算波形点
+     * 4. 绘制上下对称的声纹
+     */
+    const draw = () => {
+      // 清空画布
+      ctx.clearRect(0, 0, canvas.width, canvas.height);
+
+      if (!frequencies || !isActive) {
+        historyRef.current = [];
+        return;
+      }
+
+      const freqArray = Array.from(frequencies);
+      updateHistory(freqArray);
+
+      // 绘制声纹
+      const points: [number, number][] = [];
+      const centerY = canvas.height / 2;
+      const width = canvas.width;
+      const sliceWidth = width / (frequencies.length - 1);
+
+      // 绘制主波形
+      ctx.beginPath();
+      ctx.moveTo(0, centerY);
+
+      /**
+       * 声纹绘制算法:
+       * 1. 使用历史数据平均值实现平滑过渡
+       * 2. 通过正弦函数添加自然波动
+       * 3. 使用贝塞尔曲线连接点,使曲线更平滑
+       * 4. 绘制对称部分形成完整声纹
+       */
+      for (let i = 0; i < frequencies.length; i++) {
+        const x = i * sliceWidth;
+        let avgFrequency = frequencies[i];
+
+        /**
+         * 波形平滑处理:
+         * 1. 收集历史数据中对应位置的频率值
+         * 2. 计算当前值与历史值的加权平均
+         * 3. 根据平均值计算实际显示高度
+         */
+        if (historyRef.current.length > 0) {
+          const historicalValues = historyRef.current.map((h) => h[i] || 0);
+          avgFrequency =
+            (avgFrequency + historicalValues.reduce((a, b) => a + b, 0)) /
+            (historyRef.current.length + 1);
+        }
+
+        /**
+         * 波形变换:
+         * 1. 归一化频率值到0-1范围
+         * 2. 添加时间相关的正弦变换
+         * 3. 使用贝塞尔曲线平滑连接点
+         */
+        const normalized = avgFrequency / 255.0;
+        const height = normalized * (canvas.height / 2);
+        const y = centerY + height * Math.sin(i * 0.2 + Date.now() * 0.002);
+
+        points.push([x, y]);
+
+        if (i === 0) {
+          ctx.moveTo(x, y);
+        } else {
+          // 使用贝塞尔曲线使波形更平滑
+          const prevPoint = points[i - 1];
+          const midX = (prevPoint[0] + x) / 2;
+          ctx.quadraticCurveTo(
+            prevPoint[0],
+            prevPoint[1],
+            midX,
+            (prevPoint[1] + y) / 2,
+          );
+        }
+      }
+
+      // 绘制对称的下半部分
+      for (let i = points.length - 1; i >= 0; i--) {
+        const [x, y] = points[i];
+        const symmetricY = centerY - (y - centerY);
+        if (i === points.length - 1) {
+          ctx.lineTo(x, symmetricY);
+        } else {
+          const nextPoint = points[i + 1];
+          const midX = (nextPoint[0] + x) / 2;
+          ctx.quadraticCurveTo(
+            nextPoint[0],
+            centerY - (nextPoint[1] - centerY),
+            midX,
+            centerY - ((nextPoint[1] + y) / 2 - centerY),
+          );
+        }
+      }
+
+      ctx.closePath();
+
+      /**
+       * 渐变效果:
+       * 从左到右应用三色渐变,带透明度
+       * 使用蓝色系配色提升视觉效果
+       */
+      const gradient = ctx.createLinearGradient(0, 0, canvas.width, 0);
+      gradient.addColorStop(0, "rgba(100, 180, 255, 0.95)");
+      gradient.addColorStop(0.5, "rgba(140, 200, 255, 0.9)");
+      gradient.addColorStop(1, "rgba(180, 220, 255, 0.95)");
+
+      ctx.fillStyle = gradient;
+      ctx.fill();
+
+      animationFrameRef.current = requestAnimationFrame(draw);
+    };
+
+    // 启动动画循环
+    draw();
+
+    // 清理函数:在组件卸载时取消动画
+    return () => {
+      if (animationFrameRef.current) {
+        cancelAnimationFrame(animationFrameRef.current);
+      }
+    };
+  }, [frequencies, isActive, updateHistory]);
+
+  return (
+    <div className={styles["voice-print"]}>
+      <canvas ref={canvasRef} />
+    </div>
+  );
+}
diff --git a/app/config/server.ts b/app/config/server.ts
index 485f950da03..29d797702a0 100644
--- a/app/config/server.ts
+++ b/app/config/server.ts
@@ -129,14 +129,17 @@ export const getServerSideConfig = () => {
     if (customModels) customModels += ",";
     customModels += DEFAULT_MODELS.filter(
       (m) =>
-        (m.name.startsWith("gpt-4") || m.name.startsWith("chatgpt-4o")) &&
+        (m.name.startsWith("gpt-4") ||
+          m.name.startsWith("chatgpt-4o") ||
+          m.name.startsWith("o1")) &&
         !m.name.startsWith("gpt-4o-mini"),
     )
       .map((m) => "-" + m.name)
       .join(",");
     if (
       (defaultModel.startsWith("gpt-4") ||
-        defaultModel.startsWith("chatgpt-4o")) &&
+        defaultModel.startsWith("chatgpt-4o") ||
+        defaultModel.startsWith("o1")) &&
       !defaultModel.startsWith("gpt-4o-mini")
     )
       defaultModel = "";
diff --git a/app/constant.ts b/app/constant.ts
index 1d60e1ec663..25c8d98eae3 100644
--- a/app/constant.ts
+++ b/app/constant.ts
@@ -232,7 +232,7 @@ export const XAI = {
 
 export const ChatGLM = {
   ExampleEndpoint: CHATGLM_BASE_URL,
-  ChatPath: "/api/paas/v4/chat/completions",
+  ChatPath: "api/paas/v4/chat/completions",
 };
 
 export const DEFAULT_INPUT_TEMPLATE = `{{input}}`; // input / time / model / lang
@@ -264,6 +264,7 @@ export const KnowledgeCutOffDate: Record<string, string> = {
   "gpt-4o": "2023-10",
   "gpt-4o-2024-05-13": "2023-10",
   "gpt-4o-2024-08-06": "2023-10",
+  "gpt-4o-2024-11-20": "2023-10",
   "chatgpt-4o-latest": "2023-10",
   "gpt-4o-mini": "2023-10",
   "gpt-4o-mini-2024-07-18": "2023-10",
@@ -303,6 +304,7 @@ const openaiModels = [
   "gpt-4o",
   "gpt-4o-2024-05-13",
   "gpt-4o-2024-08-06",
+  "gpt-4o-2024-11-20",
   "chatgpt-4o-latest",
   "gpt-4o-mini",
   "gpt-4o-mini-2024-07-18",
@@ -318,6 +320,9 @@ const googleModels = [
   "gemini-1.0-pro",
   "gemini-1.5-pro-latest",
   "gemini-1.5-flash-latest",
+  "gemini-exp-1114",
+  "gemini-exp-1121",
+  "learnlm-1.5-pro-experimental",
   "gemini-pro-vision",
 ];
 
@@ -327,11 +332,13 @@ const anthropicModels = [
   "claude-2.1",
   "claude-3-sonnet-20240229",
   "claude-3-opus-20240229",
+  "claude-3-opus-latest",
   "claude-3-haiku-20240307",
+  "claude-3-5-haiku-20241022",
+  "claude-3-5-haiku-latest",
   "claude-3-5-sonnet-20240620",
   "claude-3-5-sonnet-20241022",
   "claude-3-5-sonnet-latest",
-  "claude-3-opus-latest",
 ];
 
 const baiduModels = [
diff --git a/app/icons/headphone.svg b/app/icons/headphone.svg
new file mode 100644
index 00000000000..287e3add858
--- /dev/null
+++ b/app/icons/headphone.svg
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="16" height="16" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path d="M4 28C4 26.8954 4.89543 26 6 26H10V38H6C4.89543 38 4 37.1046 4 36V28Z" fill="none" />
+    <path d="M38 26H42C43.1046 26 44 26.8954 44 28V36C44 37.1046 43.1046 38 42 38H38V26Z"
+        fill="none" />
+    <path
+        d="M10 36V24C10 16.268 16.268 10 24 10C31.732 10 38 16.268 38 24V36M10 26H6C4.89543 26 4 26.8954 4 28V36C4 37.1046 4.89543 38 6 38H10V26ZM38 26H42C43.1046 26 44 26.8954 44 28V36C44 37.1046 43.1046 38 42 38H38V26Z"
+        stroke="#333" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
+    <path d="M16 32H20L22 26L26 38L28 32H32" stroke="#333" stroke-width="4" stroke-linecap="round"
+        stroke-linejoin="round" />
+</svg>
\ No newline at end of file
diff --git a/app/icons/power.svg b/app/icons/power.svg
new file mode 100644
index 00000000000..f60fc426678
--- /dev/null
+++ b/app/icons/power.svg
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="24" height="24" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path
+        d="M14.5 8C13.8406 8.37652 13.2062 8.79103 12.6 9.24051C11.5625 10.0097 10.6074 10.8814 9.75 11.8402C6.79377 15.1463 5 19.4891 5 24.2455C5 34.6033 13.5066 43 24 43C34.4934 43 43 34.6033 43 24.2455C43 19.4891 41.2062 15.1463 38.25 11.8402C37.3926 10.8814 36.4375 10.0097 35.4 9.24051C34.7938 8.79103 34.1594 8.37652 33.5 8"
+        stroke="#333" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
+    <path d="M24 4V24" stroke="#333" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
+</svg>
\ No newline at end of file
diff --git a/app/icons/voice-off.svg b/app/icons/voice-off.svg
new file mode 100644
index 00000000000..d4aae988a82
--- /dev/null
+++ b/app/icons/voice-off.svg
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="24" height="24" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path
+        d="M31 24V11C31 7.13401 27.866 4 24 4C20.134 4 17 7.13401 17 11V24C17 27.866 20.134 31 24 31C27.866 31 31 27.866 31 24Z"
+        stroke="#d0021b" stroke-width="4" stroke-linejoin="round" />
+    <path
+        d="M9 23C9 31.2843 15.7157 38 24 38C25.7532 38 27.4361 37.6992 29 37.1465M39 23C39 25.1333 38.5547 27.1626 37.7519 29"
+        stroke="#d0021b" stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
+    <path d="M24 38V44" stroke="#d0021b" stroke-width="4" stroke-linecap="round"
+        stroke-linejoin="round" />
+    <path d="M42 42L6 6" stroke="#d0021b" stroke-width="4" stroke-linecap="round"
+        stroke-linejoin="round" />
+</svg>
\ No newline at end of file
diff --git a/app/icons/voice.svg b/app/icons/voice.svg
new file mode 100644
index 00000000000..2d85360427f
--- /dev/null
+++ b/app/icons/voice.svg
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="24" height="24" viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <rect x="17" y="4" width="14" height="27" rx="7" fill="none" stroke="#333" stroke-width="4"
+        stroke-linejoin="round" />
+    <path d="M9 23C9 31.2843 15.7157 38 24 38C32.2843 38 39 31.2843 39 23" stroke="#333"
+        stroke-width="4" stroke-linecap="round" stroke-linejoin="round" />
+    <path d="M24 38V44" stroke="#333" stroke-width="4" stroke-linecap="round"
+        stroke-linejoin="round" />
+</svg>
\ No newline at end of file
diff --git a/app/lib/audio.ts b/app/lib/audio.ts
new file mode 100644
index 00000000000..a4937d773ee
--- /dev/null
+++ b/app/lib/audio.ts
@@ -0,0 +1,200 @@
+export class AudioHandler {
+  private context: AudioContext;
+  private mergeNode: ChannelMergerNode;
+  private analyserData: Uint8Array;
+  public analyser: AnalyserNode;
+  private workletNode: AudioWorkletNode | null = null;
+  private stream: MediaStream | null = null;
+  private source: MediaStreamAudioSourceNode | null = null;
+  private recordBuffer: Int16Array[] = [];
+  private readonly sampleRate = 24000;
+
+  private nextPlayTime: number = 0;
+  private isPlaying: boolean = false;
+  private playbackQueue: AudioBufferSourceNode[] = [];
+  private playBuffer: Int16Array[] = [];
+
+  constructor() {
+    this.context = new AudioContext({ sampleRate: this.sampleRate });
+    // using ChannelMergerNode to get merged audio data, and then get analyser data.
+    this.mergeNode = new ChannelMergerNode(this.context, { numberOfInputs: 2 });
+    this.analyser = new AnalyserNode(this.context, { fftSize: 256 });
+    this.analyserData = new Uint8Array(this.analyser.frequencyBinCount);
+    this.mergeNode.connect(this.analyser);
+  }
+
+  getByteFrequencyData() {
+    this.analyser.getByteFrequencyData(this.analyserData);
+    return this.analyserData;
+  }
+
+  async initialize() {
+    await this.context.audioWorklet.addModule("/audio-processor.js");
+  }
+
+  async startRecording(onChunk: (chunk: Uint8Array) => void) {
+    try {
+      if (!this.workletNode) {
+        await this.initialize();
+      }
+
+      this.stream = await navigator.mediaDevices.getUserMedia({
+        audio: {
+          channelCount: 1,
+          sampleRate: this.sampleRate,
+          echoCancellation: true,
+          noiseSuppression: true,
+        },
+      });
+
+      await this.context.resume();
+      this.source = this.context.createMediaStreamSource(this.stream);
+      this.workletNode = new AudioWorkletNode(
+        this.context,
+        "audio-recorder-processor",
+      );
+
+      this.workletNode.port.onmessage = (event) => {
+        if (event.data.eventType === "audio") {
+          const float32Data = event.data.audioData;
+          const int16Data = new Int16Array(float32Data.length);
+
+          for (let i = 0; i < float32Data.length; i++) {
+            const s = Math.max(-1, Math.min(1, float32Data[i]));
+            int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
+          }
+
+          const uint8Data = new Uint8Array(int16Data.buffer);
+          onChunk(uint8Data);
+          // save recordBuffer
+          // @ts-ignore
+          this.recordBuffer.push.apply(this.recordBuffer, int16Data);
+        }
+      };
+
+      this.source.connect(this.workletNode);
+      this.source.connect(this.mergeNode, 0, 0);
+      this.workletNode.connect(this.context.destination);
+
+      this.workletNode.port.postMessage({ command: "START_RECORDING" });
+    } catch (error) {
+      console.error("Error starting recording:", error);
+      throw error;
+    }
+  }
+
+  stopRecording() {
+    if (!this.workletNode || !this.source || !this.stream) {
+      throw new Error("Recording not started");
+    }
+
+    this.workletNode.port.postMessage({ command: "STOP_RECORDING" });
+
+    this.workletNode.disconnect();
+    this.source.disconnect();
+    this.stream.getTracks().forEach((track) => track.stop());
+  }
+  startStreamingPlayback() {
+    this.isPlaying = true;
+    this.nextPlayTime = this.context.currentTime;
+  }
+
+  stopStreamingPlayback() {
+    this.isPlaying = false;
+    this.playbackQueue.forEach((source) => source.stop());
+    this.playbackQueue = [];
+    this.playBuffer = [];
+  }
+
+  playChunk(chunk: Uint8Array) {
+    if (!this.isPlaying) return;
+
+    const int16Data = new Int16Array(chunk.buffer);
+    // @ts-ignore
+    this.playBuffer.push.apply(this.playBuffer, int16Data); // save playBuffer
+
+    const float32Data = new Float32Array(int16Data.length);
+    for (let i = 0; i < int16Data.length; i++) {
+      float32Data[i] = int16Data[i] / (int16Data[i] < 0 ? 0x8000 : 0x7fff);
+    }
+
+    const audioBuffer = this.context.createBuffer(
+      1,
+      float32Data.length,
+      this.sampleRate,
+    );
+    audioBuffer.getChannelData(0).set(float32Data);
+
+    const source = this.context.createBufferSource();
+    source.buffer = audioBuffer;
+    source.connect(this.context.destination);
+    source.connect(this.mergeNode, 0, 1);
+
+    const chunkDuration = audioBuffer.length / this.sampleRate;
+
+    source.start(this.nextPlayTime);
+
+    this.playbackQueue.push(source);
+    source.onended = () => {
+      const index = this.playbackQueue.indexOf(source);
+      if (index > -1) {
+        this.playbackQueue.splice(index, 1);
+      }
+    };
+
+    this.nextPlayTime += chunkDuration;
+
+    if (this.nextPlayTime < this.context.currentTime) {
+      this.nextPlayTime = this.context.currentTime;
+    }
+  }
+  _saveData(data: Int16Array, bytesPerSample = 16): Blob {
+    const headerLength = 44;
+    const numberOfChannels = 1;
+    const byteLength = data.buffer.byteLength;
+    const header = new Uint8Array(headerLength);
+    const view = new DataView(header.buffer);
+    view.setUint32(0, 1380533830, false); // RIFF identifier 'RIFF'
+    view.setUint32(4, 36 + byteLength, true); // file length minus RIFF identifier length and file description length
+    view.setUint32(8, 1463899717, false); // RIFF type 'WAVE'
+    view.setUint32(12, 1718449184, false); // format chunk identifier 'fmt '
+    view.setUint32(16, 16, true); // format chunk length
+    view.setUint16(20, 1, true); // sample format (raw)
+    view.setUint16(22, numberOfChannels, true); // channel count
+    view.setUint32(24, this.sampleRate, true); // sample rate
+    view.setUint32(28, this.sampleRate * 4, true); // byte rate (sample rate * block align)
+    view.setUint16(32, numberOfChannels * 2, true); // block align (channel count * bytes per sample)
+    view.setUint16(34, bytesPerSample, true); // bits per sample
+    view.setUint32(36, 1684108385, false); // data chunk identifier 'data'
+    view.setUint32(40, byteLength, true); // data chunk length
+
+    // using data.buffer, so no need to setUint16 to view.
+    return new Blob([view, data.buffer], { type: "audio/mpeg" });
+  }
+  savePlayFile() {
+    // @ts-ignore
+    return this._saveData(new Int16Array(this.playBuffer));
+  }
+  saveRecordFile(
+    audioStartMillis: number | undefined,
+    audioEndMillis: number | undefined,
+  ) {
+    const startIndex = audioStartMillis
+      ? Math.floor((audioStartMillis * this.sampleRate) / 1000)
+      : 0;
+    const endIndex = audioEndMillis
+      ? Math.floor((audioEndMillis * this.sampleRate) / 1000)
+      : this.recordBuffer.length;
+    return this._saveData(
+      // @ts-ignore
+      new Int16Array(this.recordBuffer.slice(startIndex, endIndex)),
+    );
+  }
+  async close() {
+    this.recordBuffer = [];
+    this.workletNode?.disconnect();
+    this.source?.disconnect();
+    this.stream?.getTracks().forEach((track) => track.stop());
+    await this.context.close();
+  }
+}
diff --git a/app/locales/cn.ts b/app/locales/cn.ts
index dd3fece22f4..f906b97de3b 100644
--- a/app/locales/cn.ts
+++ b/app/locales/cn.ts
@@ -567,6 +567,39 @@ const cn = {
         SubTitle: "生成语音的速度",
       },
     },
+    Realtime: {
+      Enable: {
+        Title: "实时聊天",
+        SubTitle: "开启实时聊天功能",
+      },
+      Provider: {
+        Title: "模型服务商",
+        SubTitle: "切换不同的服务商",
+      },
+      Model: {
+        Title: "模型",
+        SubTitle: "选择一个模型",
+      },
+      ApiKey: {
+        Title: "API Key",
+        SubTitle: "API Key",
+        Placeholder: "API Key",
+      },
+      Azure: {
+        Endpoint: {
+          Title: "接口地址",
+          SubTitle: "接口地址",
+        },
+        Deployment: {
+          Title: "部署名称",
+          SubTitle: "部署名称",
+        },
+      },
+      Temperature: {
+        Title: "随机性 (temperature)",
+        SubTitle: "值越大,回复越随机",
+      },
+    },
   },
   Store: {
     DefaultTopic: "新的聊天",
diff --git a/app/locales/en.ts b/app/locales/en.ts
index 2f4217ef8e4..9bd0c850107 100644
--- a/app/locales/en.ts
+++ b/app/locales/en.ts
@@ -576,6 +576,39 @@ const en: LocaleType = {
       },
       Engine: "TTS Engine",
     },
+    Realtime: {
+      Enable: {
+        Title: "Realtime Chat",
+        SubTitle: "Enable realtime chat feature",
+      },
+      Provider: {
+        Title: "Model Provider",
+        SubTitle: "Switch between different providers",
+      },
+      Model: {
+        Title: "Model",
+        SubTitle: "Select a model",
+      },
+      ApiKey: {
+        Title: "API Key",
+        SubTitle: "API Key",
+        Placeholder: "API Key",
+      },
+      Azure: {
+        Endpoint: {
+          Title: "Endpoint",
+          SubTitle: "Endpoint",
+        },
+        Deployment: {
+          Title: "Deployment Name",
+          SubTitle: "Deployment Name",
+        },
+      },
+      Temperature: {
+        Title: "Randomness (temperature)",
+        SubTitle: "Higher values result in more random responses",
+      },
+    },
   },
   Store: {
     DefaultTopic: "New Conversation",
diff --git a/app/store/access.ts b/app/store/access.ts
index 97fc659f1ae..a4da06df16e 100644
--- a/app/store/access.ts
+++ b/app/store/access.ts
@@ -21,6 +21,7 @@ import { getClientConfig } from "../config/client";
 import { createPersistStore } from "../utils/store";
 import { ensure } from "../utils/clone";
 import { DEFAULT_CONFIG } from "./config";
+import { getModelProvider } from "../utils/model";
 
 let fetchState = 0; // 0 not fetch, 1 fetching, 2 done
 
@@ -226,9 +227,9 @@ export const useAccessStore = createPersistStore(
         .then((res) => {
           const defaultModel = res.defaultModel ?? "";
           if (defaultModel !== "") {
-            const [model, providerName] = defaultModel.split("@");
+            const [model, providerName] = getModelProvider(defaultModel);
             DEFAULT_CONFIG.modelConfig.model = model;
-            DEFAULT_CONFIG.modelConfig.providerName = providerName;
+            DEFAULT_CONFIG.modelConfig.providerName = providerName as any;
           }
 
           return res;
diff --git a/app/store/chat.ts b/app/store/chat.ts
index 067c8dadca4..b14f0039817 100644
--- a/app/store/chat.ts
+++ b/app/store/chat.ts
@@ -57,6 +57,7 @@ export type ChatMessage = RequestMessage & {
   id: string;
   model?: ModelType;
   tools?: ChatMessageTool[];
+  audio_url?: string;
 };
 
 export function createMessage(override: Partial<ChatMessage>): ChatMessage {
@@ -425,13 +426,13 @@ export const useChatStore = createPersistStore(
         }));
       },
 
-      onNewMessage(message: ChatMessage) {
-        get().updateCurrentSession((session) => {
+      onNewMessage(message: ChatMessage, targetSession: ChatSession) {
+        get().updateTargetSession(targetSession, (session) => {
           session.messages = session.messages.concat();
           session.lastUpdate = Date.now();
         });
-        get().updateStat(message);
-        get().summarizeSession();
+        get().updateStat(message, targetSession);
+        get().summarizeSession(false, targetSession);
         get().sortSessions();
         noticeCloudSync();
       },
@@ -471,10 +472,10 @@ export const useChatStore = createPersistStore(
         // get recent messages
         const recentMessages = get().getMessagesWithMemory();
         const sendMessages = recentMessages.concat(userMessage);
-        const messageIndex = get().currentSession().messages.length + 1;
+        const messageIndex = session.messages.length + 1;
 
         // save user's and bot's message
-        get().updateCurrentSession((session) => {
+        get().updateTargetSession(session, (session) => {
           const savedUserMessage = {
             ...userMessage,
             content: mContent,
@@ -495,7 +496,7 @@ export const useChatStore = createPersistStore(
             if (message) {
               botMessage.content = message;
             }
-            get().updateCurrentSession((session) => {
+            get().updateTargetSession(session, (session) => {
               session.messages = session.messages.concat();
             });
           },
@@ -503,13 +504,14 @@ export const useChatStore = createPersistStore(
             botMessage.streaming = false;
             if (message) {
               botMessage.content = message;
-              get().onNewMessage(botMessage);
+              botMessage.date = new Date().toLocaleString();
+              get().onNewMessage(botMessage, session);
             }
             ChatControllerPool.remove(session.id, botMessage.id);
           },
           onBeforeTool(tool: ChatMessageTool) {
             (botMessage.tools = botMessage?.tools || []).push(tool);
-            get().updateCurrentSession((session) => {
+            get().updateTargetSession(session, (session) => {
               session.messages = session.messages.concat();
             });
           },
@@ -519,7 +521,7 @@ export const useChatStore = createPersistStore(
                 tools[i] = { ...tool };
               }
             });
-            get().updateCurrentSession((session) => {
+            get().updateTargetSession(session, (session) => {
               session.messages = session.messages.concat();
             });
           },
@@ -534,7 +536,7 @@ export const useChatStore = createPersistStore(
             botMessage.streaming = false;
             userMessage.isError = !isAborted;
             botMessage.isError = !isAborted;
-            get().updateCurrentSession((session) => {
+            get().updateTargetSession(session, (session) => {
               session.messages = session.messages.concat();
             });
             ChatControllerPool.remove(
@@ -666,16 +668,19 @@ export const useChatStore = createPersistStore(
         set(() => ({ sessions }));
       },
 
-      resetSession() {
-        get().updateCurrentSession((session) => {
+      resetSession(session: ChatSession) {
+        get().updateTargetSession(session, (session) => {
           session.messages = [];
           session.memoryPrompt = "";
         });
       },
 
-      summarizeSession(refreshTitle: boolean = false) {
+      summarizeSession(
+        refreshTitle: boolean = false,
+        targetSession: ChatSession,
+      ) {
         const config = useAppConfig.getState();
-        const session = get().currentSession();
+        const session = targetSession;
         const modelConfig = session.mask.modelConfig;
         // skip summarize when using dalle3?
         if (isDalle3(modelConfig.model)) {
@@ -726,7 +731,8 @@ export const useChatStore = createPersistStore(
             },
             onFinish(message, responseRes) {
               if (responseRes?.status === 200) {
-                get().updateCurrentSession(
+                get().updateTargetSession(
+                  session,
                   (session) =>
                     (session.topic =
                       message.length > 0 ? trimTopic(message) : DEFAULT_TOPIC),
@@ -794,7 +800,7 @@ export const useChatStore = createPersistStore(
             onFinish(message, responseRes) {
               if (responseRes?.status === 200) {
                 console.log("[Memory] ", message);
-                get().updateCurrentSession((session) => {
+                get().updateTargetSession(session, (session) => {
                   session.lastSummarizeIndex = lastSummarizeIndex;
                   session.memoryPrompt = message; // Update the memory prompt for stored it in local storage
                 });
@@ -807,20 +813,22 @@ export const useChatStore = createPersistStore(
         }
       },
 
-      updateStat(message: ChatMessage) {
-        get().updateCurrentSession((session) => {
+      updateStat(message: ChatMessage, session: ChatSession) {
+        get().updateTargetSession(session, (session) => {
           session.stat.charCount += message.content.length;
           // TODO: should update chat count and word count
         });
       },
-
-      updateCurrentSession(updater: (session: ChatSession) => void) {
+      updateTargetSession(
+        targetSession: ChatSession,
+        updater: (session: ChatSession) => void,
+      ) {
         const sessions = get().sessions;
-        const index = get().currentSessionIndex;
+        const index = sessions.findIndex((s) => s.id === targetSession.id);
+        if (index < 0) return;
         updater(sessions[index]);
         set(() => ({ sessions }));
       },
-
       async clearAllData() {
         await indexedDBStorage.clear();
         localStorage.clear();
diff --git a/app/store/config.ts b/app/store/config.ts
index f14793c287a..4256eba925d 100644
--- a/app/store/config.ts
+++ b/app/store/config.ts
@@ -15,6 +15,7 @@ import {
   ServiceProvider,
 } from "../constant";
 import { createPersistStore } from "../utils/store";
+import type { Voice } from "rt-client";
 
 export type ModelType = (typeof DEFAULT_MODELS)[number]["name"];
 export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
@@ -90,12 +91,26 @@ export const DEFAULT_CONFIG = {
     voice: DEFAULT_TTS_VOICE,
     speed: 1.0,
   },
+
+  realtimeConfig: {
+    enable: false,
+    provider: "OpenAI" as ServiceProvider,
+    model: "gpt-4o-realtime-preview-2024-10-01",
+    apiKey: "",
+    azure: {
+      endpoint: "",
+      deployment: "",
+    },
+    temperature: 0.9,
+    voice: "alloy" as Voice,
+  },
 };
 
 export type ChatConfig = typeof DEFAULT_CONFIG;
 
 export type ModelConfig = ChatConfig["modelConfig"];
 export type TTSConfig = ChatConfig["ttsConfig"];
+export type RealtimeConfig = ChatConfig["realtimeConfig"];
 
 export function limitNumber(
   x: number,
diff --git a/app/utils.ts b/app/utils.ts
index 031016d9026..9bbbd436e92 100644
--- a/app/utils.ts
+++ b/app/utils.ts
@@ -254,21 +254,25 @@ export function getMessageImages(message: RequestMessage): string[] {
 export function isVisionModel(model: string) {
   // Note: This is a better way using the TypeScript feature instead of `&&` or `||` (ts v5.5.0-dev.20240314 I've been using)
 
+  const excludeKeywords = ["claude-3-5-haiku-20241022"];
   const visionKeywords = [
     "vision",
-    "claude-3",
-    "gemini-1.5-pro",
-    "gemini-1.5-flash",
     "gpt-4o",
-    "gpt-4o-mini",
+    "claude-3",
+    "gemini-1.5",
+    "gemini-exp",
+    "learnlm",
+    "qwen-vl",
+    "qwen2-vl",
   ];
   const isGpt4Turbo =
     model.includes("gpt-4-turbo") && !model.includes("preview");
 
   return (
-    visionKeywords.some((keyword) => model.includes(keyword)) ||
-    isGpt4Turbo ||
-    isDalle3(model)
+    !excludeKeywords.some((keyword) => model.includes(keyword)) &&
+    (visionKeywords.some((keyword) => model.includes(keyword)) ||
+      isGpt4Turbo ||
+      isDalle3(model))
   );
 }
 
diff --git a/app/utils/chat.ts b/app/utils/chat.ts
index 9209b5da540..abace88e854 100644
--- a/app/utils/chat.ts
+++ b/app/utils/chat.ts
@@ -138,7 +138,7 @@ export function uploadImage(file: Blob): Promise<string> {
   })
     .then((res) => res.json())
     .then((res) => {
-      console.log("res", res);
+      // console.log("res", res);
       if (res?.code == 0 && res?.data) {
         return res?.data;
       }
diff --git a/app/utils/model.ts b/app/utils/model.ts
index 0b62b53be09..a1b7df1b61e 100644
--- a/app/utils/model.ts
+++ b/app/utils/model.ts
@@ -37,6 +37,17 @@ const sortModelTable = (models: ReturnType<typeof collectModels>) =>
     }
   });
 
+/**
+ * get model name and provider from a formatted string,
+ * e.g. `gpt-4@OpenAi` or `claude-3-5-sonnet@20240620@Google`
+ * @param modelWithProvider model name with provider separated by last `@` char,
+ * @returns [model, provider] tuple, if no `@` char found, provider is undefined
+ */
+export function getModelProvider(modelWithProvider: string): [string, string?] {
+  const [model, provider] = modelWithProvider.split(/@(?!.*@)/);
+  return [model, provider];
+}
+
 export function collectModelTable(
   models: readonly LLMModel[],
   customModels: string,
@@ -79,10 +90,10 @@ export function collectModelTable(
         );
       } else {
         // 1. find model by name, and set available value
-        const [customModelName, customProviderName] = name.split("@");
+        const [customModelName, customProviderName] = getModelProvider(name);
         let count = 0;
         for (const fullName in modelTable) {
-          const [modelName, providerName] = fullName.split("@");
+          const [modelName, providerName] = getModelProvider(fullName);
           if (
             customModelName == modelName &&
             (customProviderName === undefined ||
@@ -102,7 +113,7 @@ export function collectModelTable(
         }
         // 2. if model not exists, create new model with available value
         if (count === 0) {
-          let [customModelName, customProviderName] = name.split("@");
+          let [customModelName, customProviderName] = getModelProvider(name);
           const provider = customProvider(
             customProviderName || customModelName,
           );
@@ -139,7 +150,7 @@ export function collectModelTableWithDefaultModel(
       for (const key of Object.keys(modelTable)) {
         if (
           modelTable[key].available &&
-          key.split("@").shift() == defaultModel
+          getModelProvider(key)[0] == defaultModel
         ) {
           modelTable[key].isDefault = true;
           break;
diff --git a/package.json b/package.json
index a036969ac66..f7b544bb2e5 100644
--- a/package.json
+++ b/package.json
@@ -27,6 +27,7 @@
     "@vercel/analytics": "^0.1.11",
     "@vercel/speed-insights": "^1.0.2",
     "axios": "^1.7.5",
+    "clsx": "^2.1.1",
     "emoji-picker-react": "^4.9.2",
     "fuse.js": "^7.0.0",
     "heic2any": "^0.0.4",
@@ -51,13 +52,14 @@
     "sass": "^1.59.2",
     "spark-md5": "^3.0.2",
     "use-debounce": "^9.0.4",
-    "zustand": "^4.3.8"
+    "zustand": "^4.3.8",
+    "rt-client": "https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.0/rt-client-0.5.0.tgz"
   },
   "devDependencies": {
     "@tauri-apps/api": "^1.6.0",
     "@tauri-apps/cli": "1.5.11",
     "@testing-library/dom": "^10.4.0",
-    "@testing-library/jest-dom": "^6.6.2",
+    "@testing-library/jest-dom": "^6.6.3",
     "@testing-library/react": "^16.0.1",
     "@types/jest": "^29.5.14",
     "@types/js-yaml": "4.0.9",
diff --git a/public/audio-processor.js b/public/audio-processor.js
new file mode 100644
index 00000000000..4fae6ea1a6f
--- /dev/null
+++ b/public/audio-processor.js
@@ -0,0 +1,48 @@
+// @ts-nocheck
+class AudioRecorderProcessor extends AudioWorkletProcessor {
+  constructor() {
+    super();
+    this.isRecording = false;
+    this.bufferSize = 2400; // 100ms at 24kHz
+    this.currentBuffer = [];
+
+    this.port.onmessage = (event) => {
+      if (event.data.command === "START_RECORDING") {
+        this.isRecording = true;
+      } else if (event.data.command === "STOP_RECORDING") {
+        this.isRecording = false;
+
+        if (this.currentBuffer.length > 0) {
+          this.sendBuffer();
+        }
+      }
+    };
+  }
+
+  sendBuffer() {
+    if (this.currentBuffer.length > 0) {
+      const audioData = new Float32Array(this.currentBuffer);
+      this.port.postMessage({
+        eventType: "audio",
+        audioData: audioData,
+      });
+      this.currentBuffer = [];
+    }
+  }
+
+  process(inputs) {
+    const input = inputs[0];
+    if (input.length > 0 && this.isRecording) {
+      const audioData = input[0];
+
+      this.currentBuffer.push(...audioData);
+
+      if (this.currentBuffer.length >= this.bufferSize) {
+        this.sendBuffer();
+      }
+    }
+    return true;
+  }
+}
+
+registerProcessor("audio-recorder-processor", AudioRecorderProcessor);
diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json
index 415825b13f2..8f5dd4ab3da 100644
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@@ -9,7 +9,7 @@
   },
   "package": {
     "productName": "NextChat",
-    "version": "2.15.6"
+    "version": "2.15.8"
   },
   "tauri": {
     "allowlist": {
diff --git a/test/model-provider.test.ts b/test/model-provider.test.ts
new file mode 100644
index 00000000000..41f14be026c
--- /dev/null
+++ b/test/model-provider.test.ts
@@ -0,0 +1,31 @@
+import { getModelProvider } from "../app/utils/model";
+
+describe("getModelProvider", () => {
+  test("should return model and provider when input contains '@'", () => {
+    const input = "model@provider";
+    const [model, provider] = getModelProvider(input);
+    expect(model).toBe("model");
+    expect(provider).toBe("provider");
+  });
+
+  test("should return model and undefined provider when input does not contain '@'", () => {
+    const input = "model";
+    const [model, provider] = getModelProvider(input);
+    expect(model).toBe("model");
+    expect(provider).toBeUndefined();
+  });
+
+  test("should handle multiple '@' characters correctly", () => {
+    const input = "model@provider@extra";
+    const [model, provider] = getModelProvider(input);
+    expect(model).toBe("model@provider");
+    expect(provider).toBe("extra");
+  });
+
+  test("should return empty strings when input is empty", () => {
+    const input = "";
+    const [model, provider] = getModelProvider(input);
+    expect(model).toBe("");
+    expect(provider).toBeUndefined();
+  });
+});
diff --git a/yarn.lock b/yarn.lock
index 16b8b872ee9..ff257a3ef6f 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -2114,10 +2114,10 @@
     lz-string "^1.5.0"
     pretty-format "^27.0.2"
 
-"@testing-library/jest-dom@^6.6.2":
-  version "6.6.2"
-  resolved "https://registry.yarnpkg.com/@testing-library/jest-dom/-/jest-dom-6.6.2.tgz#8186aa9a07263adef9cc5a59a4772db8c31f4a5b"
-  integrity sha512-P6GJD4yqc9jZLbe98j/EkyQDTPgqftohZF5FBkHY5BUERZmcf4HeO2k0XaefEg329ux2p21i1A1DmyQ1kKw2Jw==
+"@testing-library/jest-dom@^6.6.3":
+  version "6.6.3"
+  resolved "https://registry.yarnpkg.com/@testing-library/jest-dom/-/jest-dom-6.6.3.tgz#26ba906cf928c0f8172e182c6fe214eb4f9f2bd2"
+  integrity sha512-IteBhl4XqYNkM54f4ejhLRJiZNqcSCoXUOG2CPK7qbD322KjQozM4kHQOfkG2oln9b9HTYqs+Sae8vBATubxxA==
   dependencies:
     "@adobe/css-tools" "^4.4.0"
     aria-query "^5.0.0"
@@ -3189,6 +3189,11 @@ cliui@^8.0.1:
     strip-ansi "^6.0.1"
     wrap-ansi "^7.0.0"
 
+clsx@^2.1.1:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/clsx/-/clsx-2.1.1.tgz#eed397c9fd8bd882bfb18deab7102049a2f32999"
+  integrity sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==
+
 co@^4.6.0:
   version "4.6.0"
   resolved "https://registry.npmmirror.com/co/-/co-4.6.0.tgz#6ea6bdf3d853ae54ccb8e47bfa0bf3f9031fb184"
@@ -7450,6 +7455,12 @@ robust-predicates@^3.0.0:
   resolved "https://registry.npmmirror.com/robust-predicates/-/robust-predicates-3.0.1.tgz#ecde075044f7f30118682bd9fb3f123109577f9a"
   integrity sha512-ndEIpszUHiG4HtDsQLeIuMvRsDnn8c8rYStabochtUeCvfuvNptb5TUbVD68LRAILPX7p9nqQGh4xJgn3EHS/g==
 
+"rt-client@https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.0/rt-client-0.5.0.tgz":
+  version "0.5.0"
+  resolved "https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.0/rt-client-0.5.0.tgz#abf2e9a850201e3571b8d36830f77bc52af3de9b"
+  dependencies:
+    ws "^8.18.0"
+
 run-parallel@^1.1.9:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee"
@@ -8493,9 +8504,9 @@ write-file-atomic@^4.0.2:
     imurmurhash "^0.1.4"
     signal-exit "^3.0.7"
 
-ws@^8.11.0:
+ws@^8.11.0, ws@^8.18.0:
   version "8.18.0"
-  resolved "https://registry.npmmirror.com/ws/-/ws-8.18.0.tgz#0d7505a6eafe2b0e712d232b42279f53bc289bbc"
+  resolved "https://registry.yarnpkg.com/ws/-/ws-8.18.0.tgz#0d7505a6eafe2b0e712d232b42279f53bc289bbc"
   integrity sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==
 
 xml-name-validator@^4.0.0: