microsoft · yuslepukhin · Jun 26, 2024 · Jun 27, 2024 · Jun 26, 2024 · Jun 27, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -20,7 +20,26 @@ if(MSVC)
   add_compile_definitions(_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR)
 endif()
 
+find_package(Patch)
+if (WIN32 AND NOT Patch_FOUND)
+    # work around CI machines missing patch from the git install by falling back to the binary in this repo.
+    # replicate what happens in https://github.com/Kitware/CMake/blob/master/Modules/FindPatch.cmake but without
+    # the hardcoded suffixes in the path to the patch binary.
+    find_program(Patch_EXECUTABLE NAMES patch PATHS ${PROJECT_SOURCE_DIR}/external/git.Win32.2.41.03.patch)
+    if(Patch_EXECUTABLE)
+      set(Patch_FOUND 1)
+      if (NOT TARGET Patch::patch)
+        add_executable(Patch::patch IMPORTED)
+        set_property(TARGET Patch::patch PROPERTY IMPORTED_LOCATION ${Patch_EXECUTABLE})
+      endif()
+    endif()
+endif()
+if(Patch_FOUND)
+  message("Patch found: ${Patch_EXECUTABLE}")
+endif()
+
 include(cmake/external/onnxruntime_external_deps.cmake)
+
 # All Global variables, including GLOB, for the top level CMakeLists.txt should be defined here
 include(cmake/global_variables.cmake)
 # Checking if CUDA is supported
@@ -32,6 +51,8 @@ include(cmake/check_dml.cmake)
 
 include(cmake/cxx_standard.cmake)
 
+include(cmake/genai_flatbuffers.cmake)
+
 add_compile_definitions(BUILDING_ORT_GENAI_C)
 if(MSVC)
   # set updated value for __cplusplus macro instead of 199711L
@@ -72,6 +93,10 @@ else()
   add_library(onnxruntime-genai-static STATIC ${generator_srcs})
 endif()
 
+target_link_libraries(onnxruntime-genai PRIVATE genai_flatbuffers)
+target_link_libraries(onnxruntime-genai-static PRIVATE genai_flatbuffers)
+
+
 target_include_directories(onnxruntime-genai PRIVATE ${ORT_HEADER_DIR})
 target_include_directories(onnxruntime-genai-static PRIVATE ${ORT_HEADER_DIR})
 target_include_directories(onnxruntime-genai PRIVATE ${onnxruntime_extensions_SOURCE_DIR}/include)

diff --git a/cmake/deps.txt b/cmake/deps.txt
@@ -10,6 +10,7 @@
 #not affect built binaries.
 #
 # NOTE: You must run deps_update_and_upload.py and generate_cgmanifest.py when ready to test your changes in a CI.
+flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v23.5.26.zip;59422c3b5e573dd192fead2834d25951f1c1670c
 pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.10.1.zip;769b6aa67a77f17a770960f604b727645b6f6a13
 googletest;https://github.com/google/googletest/archive/530d5c8c84abd2a46f38583ee817743c9b3a42b4.zip;5e3a61db2aa975cfd0f97ba92c818744e7fa7034
 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5

diff --git a/cmake/external/git.Win32.2.41.03.patch/msys-2.0.dll b/cmake/external/git.Win32.2.41.03.patch/msys-2.0.dll
diff --git a/cmake/external/git.Win32.2.41.03.patch/msys-gcc_s-1.dll b/cmake/external/git.Win32.2.41.03.patch/msys-gcc_s-1.dll
diff --git a/cmake/external/git.Win32.2.41.03.patch/patch.exe b/cmake/external/git.Win32.2.41.03.patch/patch.exe
diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake
@@ -19,6 +19,38 @@ endforeach()
 
 message("Loading Dependencies ...")
 
+
+# Flatbuffers
+# We do not need to build flatc for iOS or Android Cross Compile
+if (CMAKE_SYSTEM_NAME STREQUAL "iOS" OR CMAKE_SYSTEM_NAME STREQUAL "Android" OR CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+  set(FLATBUFFERS_BUILD_FLATC OFF CACHE BOOL "FLATBUFFERS_BUILD_FLATC" FORCE)
+endif()
+set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "FLATBUFFERS_BUILD_TESTS" FORCE)
+set(FLATBUFFERS_INSTALL OFF CACHE BOOL "FLATBUFFERS_INSTALL" FORCE)
+set(FLATBUFFERS_BUILD_FLATHASH OFF CACHE BOOL "FLATBUFFERS_BUILD_FLATHASH" FORCE)
+set(FLATBUFFERS_BUILD_FLATLIB ON CACHE BOOL "FLATBUFFERS_BUILD_FLATLIB" FORCE)
+
+if(NOT WIN32)
+  if(Patch_FOUND)
+    set(GENAI_FLATBUFFERS_PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < 
+        ${CMAKE_SOURCE_DIR}/cmake/patches/flatbuffers/flatbuffers.patch)
+  else()
+   set(GENAI_FLATBUFFERS_PATCH_COMMAND "")
+  endif()
+else()
+  set(GENAI_FLATBUFFERS_PATCH_COMMAND "")
+endif()
+
+FetchContent_Declare(
+    flatbuffers
+    URL ${DEP_URL_flatbuffers}
+    URL_HASH SHA1=${DEP_SHA1_flatbuffers}
+    PATCH_COMMAND ${GENAI_FLATBUFFERS_PATCH_COMMAND}
+    FIND_PACKAGE_ARGS 23.5.9 NAMES Flatbuffers
+)
+
+onnxruntime_fetchcontent_makeavailable(flatbuffers)
+
 if(ENABLE_PYTHON)
   FetchContent_Declare(
     pybind11_project

diff --git a/cmake/genai_flatbuffers.cmake b/cmake/genai_flatbuffers.cmake
@@ -0,0 +1,19 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+file(GLOB genai_flatbuffers_srcs CONFIGURE_DEPENDS
+    "${CMAKE_SOURCE_DIR}/src/flatbuffers/*.h"
+    "${CMAKE_SOURCE_DIR}/src/flatbuffers/*.cc"
+    )
+
+add_library(genai_flatbuffers STATIC ${genai_flatbuffers_srcs})
+target_link_libraries(genai_flatbuffers PUBLIC FlatBuffers::FlatBuffers)
+
+target_include_directories(genai_flatbuffers PRIVATE ${ORT_HEADER_DIR})
+target_link_directories(genai_flatbuffers PRIVATE ${ORT_LIB_DIR})
+
+# Add dependency so the flatbuffers compiler is built if enabled
+if (FLATBUFFERS_BUILD_FLATC)
+  add_dependencies(genai_flatbuffers flatc)
+endif()
+
diff --git a/cmake/patches/flatbuffers/flatbuffers.patch b/cmake/patches/flatbuffers/flatbuffers.patch
@@ -0,0 +1,12 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 3987eac9..5e5462f1 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -279,5 +279,5 @@
+ # Append FLATBUFFERS_CXX_FLAGS to CMAKE_CXX_FLAGS.
+ if(DEFINED FLATBUFFERS_CXX_FLAGS)
+   message(STATUS "extend CXX_FLAGS with ${FLATBUFFERS_CXX_FLAGS}")
+-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLATBUFFERS_CXX_FLAGS}")
++  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLATBUFFERS_CXX_FLAGS} -Wno-error=stringop-overflow")
+ endif()
+ message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
diff --git a/src/config.cpp b/src/config.cpp
@@ -2,6 +2,7 @@
 // Licensed under the MIT License.
 #include "generators.h"
 #include "json.h"
+
 #include <fstream>
 #include <sstream>
 
@@ -460,6 +461,31 @@ struct Search_Element : JSON::Element {
   Config::Search& v_;
 };
 
+class LoraAdapters_Element : public JSON::Element {
+ public:
+  explicit LoraAdapters_Element(Config::LoraAdapters& v) noexcept : v_{v} {}
+
+ private:
+  JSON::Element& OnObject(std::string_view name) override {
+    if (current_adapter_ != name) {
+      current_adapter_ = name;
+      return *this;
+    }
+    throw JSON::unknown_value_error{};
+  }
+
+  void OnString(std::string_view name, std::string_view path) override {
+    if (name == "weights") {
+      v_.adapters.emplace(current_adapter_, path);
+    } else {
+      throw JSON::unknown_value_error{};
+    }
+  }
+
+  Config::LoraAdapters& v_;
+  std::string current_adapter_;
+};
+
 void SetSearchNumber(Config::Search& search, std::string_view name, double value) {
   Search_Element(search).OnNumber(name, value);
 }
@@ -499,12 +525,16 @@ struct Root_Element : JSON::Element {
     if (name == "search") {
       return search_element_;
     }
+    if (name == "adapters") {
+      return lora_adapters_element_;
+    }
     throw JSON::unknown_value_error{};
   }
 
   Config& config_;
   Model_Element model_element_{config_.model};
   Search_Element search_element_{config_.search};
+  LoraAdapters_Element lora_adapters_element_{config_.lora_adapters};
 };
 
 struct RootObject_Element : JSON::Element {

diff --git a/src/config.h b/src/config.h
@@ -126,6 +126,11 @@ struct Config {
     int random_seed{-1};               // -1 = Seed with random device, otherwise use value to seed RNG
   } search;
 
+  struct LoraAdapters {
+    // Stores adapter name to file name mapping
+    std::unordered_map<std::string, std::string> adapters;
+  } lora_adapters;
+
   void AddMapping(const std::string& nominal_name, const std::string& graph_name);
   // Returns graph name and true if the nominal name is found in the mapping
   // otherwise returns the nominal name and false

diff --git a/src/flatbuffers.h b/src/flatbuffers.h
@@ -0,0 +1,17 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#if defined(__GNUC__)
+#pragma GCC diagnostic push
+
+#ifdef HAS_SHORTEN_64_TO_32
+#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
+#endif
+#endif
+
+#include "flatbuffers/flatbuffers.h"
+
+#if defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
diff --git a/src/flatbuffers/flatbuffers_utils.cc b/src/flatbuffers/flatbuffers_utils.cc
@@ -0,0 +1,63 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "flatbuffers_utils.h"
+#include "schema/genai_lora.fbs.h"
+#include "../../src/models/onnxruntime_api.h"
+
+#include "../models/onnxruntime_api.h"
+
+namespace Generators {
+namespace lora_parameters {
+namespace utils {
+
+bool IsGenAiLoraFormatModelBytes(const void* bytes, size_t num_bytes) {
+  return num_bytes > 8 &&  // check buffer is large enough to contain identifier so we don't read random memory
+         ParametersBufferHasIdentifier(bytes);
+}
+
+flatbuffers::Offset<flatbuffers::String> SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder,
+                                                                bool has_string, const std::string& src) {
+  if (has_string) return builder.CreateString(src);
+
+  // If the string does not exist, return 0 (the string does not exist in flatbuffer)
+  return 0;
+}
+
+void LoadStringFromLoraFormat(std::string& dst, const flatbuffers::String* fbs_string) {
+  if (fbs_string) {
+    dst = fbs_string->str();
+  }
+}
+
+void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name,
+                       Generators::lora_parameters::TensorDataType data_type, std::span<const int64_t> shape,
+                       std::span<const uint8_t> data,
+                       flatbuffers::Offset<Generators::lora_parameters::Param>& fbs_tensor) {
+  auto name_str = (name.empty()) ? 0 : flat_builder.CreateString(name.data(), name.size());
+  auto shape_vec = flat_builder.CreateVector(shape.data(), shape.size());
+  auto data_vec = flat_builder.CreateVector(data.data(), data.size());
+
+  fbs_tensor = CreateParam(flat_builder, name_str, shape_vec, data_type, data_vec);
+}
+
+std::pair<std::string, std::unique_ptr<OrtValue>> CreateOrtValueOverFlatBufferLoraParameter(
+    const Generators::lora_parameters::Param& tensor) {
+  std::string name;
+  LoadStringFromLoraFormat(name, tensor.name());
+
+  const auto data_type = tensor.data_type();
+
+  std::span<const int64_t> shape_span(tensor.dims()->data(), tensor.dims()->size());
+
+  auto mem_info = OrtMemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
+  auto ort_value =
+      OrtValue::CreateTensor(*mem_info, const_cast<uint8_t*>(tensor.raw_data()->data()),
+                             static_cast<size_t>(tensor.raw_data()->size()), shape_span,
+                             static_cast<ONNXTensorElementDataType>(data_type));
+  return std::make_pair(std::move(name), std::move(ort_value));
+}
+
+}  // namespace utils
+}  // namespace lora_parameters
+}  // namespace Generators
diff --git a/src/flatbuffers/flatbuffers_utils.h b/src/flatbuffers/flatbuffers_utils.h
@@ -0,0 +1,59 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "../flatbuffers.h"
+#include "../span.h"
+
+#include "schema/genai_lora.fbs.h"
+
+#include <string>
+#include <string_view>
+#include <unordered_map>
+
+struct OrtValue;
+
+namespace Generators {
+namespace lora_parameters {
+namespace utils {
+
+// Will only create string in flatbuffers when has_string is true
+flatbuffers::Offset<flatbuffers::String> SaveStringToLoraFormat(flatbuffers::FlatBufferBuilder& builder,
+                                                                bool has_string, const std::string& src);
+
+void LoadStringFromLoraFormat(std::string& dst, const flatbuffers::String* fbs_string);
+
+/// <summary>
+/// Serializes tensor data into flatbuffer
+/// </summary>
+/// <param name="flat_builder"></param>
+/// <param name="name">parameter name</param>
+/// <param name="doc">doc, optional</param>
+/// <param name="data_type"></param>
+/// <param name="shape"></param>
+/// <param name="data"></param>
+/// <param name="fbs_tensor">output offset</param>
+void SaveLoraParameter(flatbuffers::FlatBufferBuilder& flat_builder, std::string_view name,
+                       Generators::lora_parameters::TensorDataType data_type,
+                       std::span<const int64_t> shape, std::span<const uint8_t> data,
+                       flatbuffers::Offset<Generators::lora_parameters::Param>& fbs_tensor);
+
+/// <summary>
+/// Create an OrtValue on top of the flatbuffer tensor
+/// No copying of data is done here. The caller is responsible for managing the lifetime of flatbuffer
+/// structures.
+///
+/// In this scenario, one can memory map the entire flatbuffer tensor data into OrtValue without copying.
+/// </summary>
+/// <param name="tensor"></param>
+/// <returns></returns>
+std::pair<std::string, std::unique_ptr<OrtValue>> CreateOrtValueOverFlatBufferLoraParameter(
+    const Generators::lora_parameters::Param& tensor);
+
+// check if bytes has fileidentifier for lora parameters
+bool IsGenAiLoraFormatModelBytes(const void* bytes, size_t num_bytes);
+
+}  // namespace utils
+}  // namespace lora_parameters
+}  // namespace Generators
diff --git a/src/flatbuffers/lora_format_version.h b/src/flatbuffers/lora_format_version.h
@@ -0,0 +1,33 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+
+namespace Generators {
+namespace lora_parameters {
+
+// The current model versions for saving lora parameters in flatbuffers
+// Once this version is updated, the kSupportedLoraFormatVersions in IsGenAiLoraFormatModelBytes
+// below will also need to be updated.
+// See src/flatbuffers/schema/README.md for more details on versioning.
+// Version 1 - history begins
+constexpr const int kLoraFormatVersion = 1;
+
+// Check if the given lora format version is supported in this build
+inline bool IsLoraFormatVersionSupported(const int lora_format_version) {
+  // The lora format versions we will support in this build
+  // This may contain more versions than the kLoraFormatVersion, based on the compatibilities
+  constexpr std::array<int, 1U> kSupportedLoraFormatVersions{
+      kLoraFormatVersion,
+  };
+
+  const auto it =
+      std::find(kSupportedLoraFormatVersions.begin(), kSupportedLoraFormatVersions.end(), lora_format_version);
+  return it != kSupportedLoraFormatVersions.cend();
+}
+
+}  // namespace lora_parameters
+}  // namespace Generators