diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..dd84ea78 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..bbcbbe7d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/Cargo.lock b/Cargo.lock index a5877bbf..34138565 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -39,12 +39,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "allocator-api2" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" - [[package]] name = "android-tzdata" version = "0.1.1" @@ -86,18 +80,19 @@ dependencies = [ [[package]] name = "async-openai" -version = "0.21.0" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "007f03f7e27271451af57ced242d6adfa04204d1275a91ec0952bf441fd8d102" +checksum = "a851fe1ea66feec2e15ba280823ee3e6a4ede4e7afde576fe312dfb012965a91" dependencies = [ "async-convert", "backoff", "base64 0.22.1", "bytes", "derive_builder", + "eventsource-stream", "futures", "rand", - "reqwest 0.12.4", + "reqwest", "reqwest-eventsource", "secrecy", "serde", @@ -227,9 +222,9 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.72" +version = "0.3.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" +checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" dependencies = [ "addr2line", "cc", @@ -308,33 +303,10 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", - "serde", "wasm-bindgen", "windows-targets 0.52.5", ] -[[package]] -name = "code_ops" -version = "0.1.0" -dependencies = [ - "anyhow", - "cc", - "derive_builder", - "indoc", - "infrastructure", - "serde", - "serde_json", - "strum", - "strum_macros", - "tree-sitter", - "tree-sitter-javascript", - "tree-sitter-python", - "tree-sitter-ruby", - "tree-sitter-rust", - "tree-sitter-typescript", - "uuid", -] - [[package]] name = "combine" version = "4.6.7" @@ -374,15 +346,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crossbeam-channel" -version = "0.5.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-deque" version = "0.8.5" @@ -443,15 +406,6 @@ dependencies = [ "syn", ] -[[package]] -name = "deranged" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" -dependencies = [ - "powerfmt", -] - [[package]] name = "derive_builder" version = "0.20.0" @@ -495,10 +449,15 @@ dependencies = [ ] [[package]] -name = "dotenvy" -version = "0.15.7" +name = "displaydoc" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "either" @@ -506,31 +465,12 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" -[[package]] -name = "encoding_rs" -version = "0.8.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" -dependencies = [ - "cfg-if", -] - [[package]] name = "equivalent" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" -[[package]] -name = "errno" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - [[package]] name = "eventsource-stream" version = "0.2.3" @@ -542,12 +482,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "fastrand" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" - [[package]] name = "flate2" version = "1.0.30" @@ -564,21 +498,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared", -] - -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - [[package]] name = "form_urlencoded" version = "1.2.1" @@ -690,10 +609,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", - "js-sys", "libc", "wasi", - "wasm-bindgen", ] [[package]] @@ -702,12 +619,6 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - [[package]] name = "globset" version = "0.4.14" @@ -717,8 +628,8 @@ dependencies = [ "aho-corasick", "bstr", "log", - "regex-automata 0.4.7", - "regex-syntax 0.8.4", + "regex-automata", + "regex-syntax", ] [[package]] @@ -770,10 +681,6 @@ name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "allocator-api2", -] [[package]] name = "heck" @@ -832,12 +739,12 @@ dependencies = [ [[package]] name = "http-body-util" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", - "futures-core", + "futures-util", "http 1.1.0", "http-body 1.0.0", "pin-project-lite", @@ -845,9 +752,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.8.0" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +checksum = "d0e7a4dd27b9476dc40cb050d3632d3bba3a70ddbff012285f7f8559a1e7e545" [[package]] name = "httpdate" @@ -899,20 +806,6 @@ dependencies = [ "want", ] -[[package]] -name = "hyper-rustls" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" -dependencies = [ - "futures-util", - "http 0.2.12", - "hyper 0.14.29", - "rustls 0.21.12", - "tokio", - "tokio-rustls 0.24.1", -] - [[package]] name = "hyper-rustls" version = "0.26.0" @@ -923,12 +816,10 @@ dependencies = [ "http 1.1.0", "hyper 1.3.1", "hyper-util", - "log", - "rustls 0.22.4", - "rustls-native-certs 0.7.0", + "rustls", "rustls-pki-types", "tokio", - "tokio-rustls 0.25.0", + "tokio-rustls", "tower-service", ] @@ -944,35 +835,6 @@ dependencies = [ "tokio-io-timeout", ] -[[package]] -name = "hyper-timeout" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793" -dependencies = [ - "hyper 1.3.1", - "hyper-util", - "pin-project-lite", - "tokio", - "tower-service", -] - -[[package]] -name = "hyper-tls" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" -dependencies = [ - "bytes", - "http-body-util", - "hyper 1.3.1", - "hyper-util", - "native-tls", - "tokio", - "tokio-native-tls", - "tower-service", -] - [[package]] name = "hyper-util" version = "0.1.5" @@ -1016,6 +878,124 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f8ac670d7422d7f76b32e17a5db556510825b29ec9154f235977c9caba61036" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -1024,12 +1004,14 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "0.5.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "4716a3a0933a1d01c2f72450e89596eb51dd34ef3c211ccd875acdf1f8fe47ed" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "icu_normalizer", + "icu_properties", + "smallvec", + "utf8_iter", ] [[package]] @@ -1042,37 +1024,12 @@ dependencies = [ "globset", "log", "memchr", - "regex-automata 0.4.7", + "regex-automata", "same-file", "walkdir", "winapi-util", ] -[[package]] -name = "indexing" -version = "0.1.0" -dependencies = [ - "anyhow", - "async-stream", - "async-trait", - "chrono", - "code_ops", - "futures-util", - "ignore", - "indoc", - "infrastructure", - "itertools 0.13.0", - "qdrant-client", - "redis", - "serde", - "serde_json", - "text-splitter", - "tokio", - "tokio-stream", - "tracing", - "uuid", -] - [[package]] name = "indexmap" version = "1.9.3" @@ -1099,39 +1056,6 @@ version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" -[[package]] -name = "infrastructure" -version = "0.1.0" -dependencies = [ - "anyhow", - "async-openai", - "async-trait", - "base64 0.22.1", - "dotenvy", - "itertools 0.13.0", - "jsonwebtoken", - "octocrab", - "once_cell", - "opentelemetry 0.23.0", - "opentelemetry-aws", - "opentelemetry-otlp", - "opentelemetry_sdk 0.23.0", - "qdrant-client", - "rand", - "reqwest 0.12.4", - "serde", - "serde_json", - "strum", - "strum_macros", - "tokio", - "tokio-util", - "tracing", - "tracing-opentelemetry", - "tracing-subscriber", - "url", - "uuid", -] - [[package]] name = "instant" version = "0.1.13" @@ -1147,16 +1071,6 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" -[[package]] -name = "iri-string" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f5f6c2df22c009ac44f6f1499308e7a3ac7ba42cd2378475cc691510e1eef1b" -dependencies = [ - "memchr", - "serde", -] - [[package]] name = "itertools" version = "0.12.1" @@ -1190,27 +1104,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "jsonwebtoken" -version = "9.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9ae10193d25051e74945f1ea2d0b42e03cc3b890f7e4cc5faa44997d808193f" -dependencies = [ - "base64 0.21.7", - "js-sys", - "pem", - "ring", - "serde", - "serde_json", - "simple_asn1", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - [[package]] name = "libc" version = "0.2.155" @@ -1218,10 +1111,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] -name = "linux-raw-sys" -version = "0.4.14" +name = "litemap" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" [[package]] name = "lock_api" @@ -1239,15 +1132,6 @@ version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" -[[package]] -name = "matchers" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" -dependencies = [ - "regex-automata 0.1.10", -] - [[package]] name = "matchit" version = "0.7.3" @@ -1302,23 +1186,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "native-tls" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" -dependencies = [ - "libc", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "schannel", - "security-framework", - "security-framework-sys", - "tempfile", -] - [[package]] name = "nom" version = "7.1.3" @@ -1330,47 +1197,12 @@ dependencies = [ ] [[package]] -name = "nu-ansi-term" -version = "0.46.0" +name = "num-traits" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ - "overload", - "winapi", -] - -[[package]] -name = "num-bigint" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" -dependencies = [ - "num-integer", - "num-traits", -] - -[[package]] -name = "num-conv" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", + "autocfg", ] [[package]] @@ -1385,243 +1217,25 @@ dependencies = [ [[package]] name = "object" -version = "0.35.0" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" +checksum = "576dfe1fc8f9df304abb159d767a29d0476f7750fbf8aa7ad07816004a207434" dependencies = [ "memchr", ] -[[package]] -name = "octocrab" -version = "0.38.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68a8a3df00728324ad654ecd1ed449a60157c55b7ff8c109af3a35989687c367" -dependencies = [ - "arc-swap", - "async-trait", - "base64 0.22.1", - "bytes", - "cfg-if", - "chrono", - "either", - "futures", - "futures-util", - "http 1.1.0", - "http-body 1.0.0", - "http-body-util", - "hyper 1.3.1", - "hyper-rustls 0.26.0", - "hyper-timeout 0.5.1", - "hyper-util", - "jsonwebtoken", - "once_cell", - "percent-encoding", - "pin-project", - "secrecy", - "serde", - "serde_json", - "serde_path_to_error", - "serde_urlencoded", - "snafu", - "tokio", - "tower", - "tower-http", - "tracing", - "url", -] - [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" -[[package]] -name = "openssl" -version = "0.10.64" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" -dependencies = [ - "bitflags 2.5.0", - "cfg-if", - "foreign-types", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "openssl-probe" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" -[[package]] -name = "openssl-sys" -version = "0.9.102" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "opentelemetry" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "900d57987be3f2aeb70d385fff9b27fb74c5723cc9a52d904d4f9c807a0667bf" -dependencies = [ - "futures-core", - "futures-sink", - "js-sys", - "once_cell", - "pin-project-lite", - "thiserror", - "urlencoding", -] - -[[package]] -name = "opentelemetry" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b69a91d4893e713e06f724597ad630f1fa76057a5e1026c0ca67054a9032a76" -dependencies = [ - "futures-core", - "futures-sink", - "js-sys", - "once_cell", - "pin-project-lite", - "thiserror", -] - -[[package]] -name = "opentelemetry-aws" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42c915961c059be65af3be9aeaedb3f8930e1e9590e26e5f23b1919e90d1ed7d" -dependencies = [ - "once_cell", - "opentelemetry 0.22.0", - "opentelemetry_sdk 0.22.1", -] - -[[package]] -name = "opentelemetry-http" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0ba633e55c5ea6f431875ba55e71664f2fa5d3a90bd34ec9302eecc41c865dd" -dependencies = [ - "async-trait", - "bytes", - "http 0.2.12", - "opentelemetry 0.23.0", - "reqwest 0.11.27", -] - -[[package]] -name = "opentelemetry-otlp" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a94c69209c05319cdf7460c6d4c055ed102be242a0a6245835d7bc42c6ec7f54" -dependencies = [ - "async-trait", - "futures-core", - "http 0.2.12", - "opentelemetry 0.23.0", - "opentelemetry-http", - "opentelemetry-proto", - "opentelemetry_sdk 0.23.0", - "prost", - "reqwest 0.11.27", - "thiserror", - "tokio", - "tonic", -] - -[[package]] -name = "opentelemetry-proto" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "984806e6cf27f2b49282e2a05e288f30594f3dbc74eb7a6e99422bc48ed78162" -dependencies = [ - "opentelemetry 0.23.0", - "opentelemetry_sdk 0.23.0", - "prost", - "tonic", -] - -[[package]] -name = "opentelemetry_sdk" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e90c7113be649e31e9a0f8b5ee24ed7a16923b322c3c5ab6367469c049d6b7e" -dependencies = [ - "async-trait", - "crossbeam-channel", - "futures-channel", - "futures-executor", - "futures-util", - "once_cell", - "opentelemetry 0.22.0", - "ordered-float", - "percent-encoding", - "rand", - "thiserror", -] - -[[package]] -name = "opentelemetry_sdk" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae312d58eaa90a82d2e627fd86e075cf5230b3f11794e2ed74199ebbe572d4fd" -dependencies = [ - "async-trait", - "futures-channel", - "futures-executor", - "futures-util", - "glob", - "lazy_static", - "once_cell", - "opentelemetry 0.23.0", - "ordered-float", - "percent-encoding", - "rand", - "thiserror", - "tokio", - "tokio-stream", -] - -[[package]] -name = "ordered-float" -version = "4.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a76df7075c7d4d01fdcb46c912dd17fba5b60c78ea480b475f2b6ab6f666584e" -dependencies = [ - "num-traits", -] - -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - [[package]] name = "parking_lot" version = "0.12.3" @@ -1645,16 +1259,6 @@ dependencies = [ "windows-targets 0.52.5", ] -[[package]] -name = "pem" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e459365e590736a54c3fa561947c84837534b8e9af6fc5bf781307e82658fae" -dependencies = [ - "base64 0.22.1", - "serde", -] - [[package]] name = "percent-encoding" version = "2.3.1" @@ -1693,18 +1297,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" -[[package]] -name = "pkg-config" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" - -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - [[package]] name = "ppv-lite86" version = "0.2.17" @@ -1773,7 +1365,7 @@ dependencies = [ "futures-util", "prost", "prost-types", - "reqwest 0.12.4", + "reqwest", "serde", "serde_json", "tonic", @@ -1833,16 +1425,16 @@ dependencies = [ "itoa", "percent-encoding", "pin-project-lite", - "rustls 0.22.4", - "rustls-native-certs 0.7.0", - "rustls-pemfile 2.1.2", + "rustls", + "rustls-native-certs", + "rustls-pemfile", "rustls-pki-types", "ryu", "sha1_smol", "socket2", "tokio", "tokio-retry", - "tokio-rustls 0.25.0", + "tokio-rustls", "tokio-util", "url", ] @@ -1864,17 +1456,8 @@ checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.7", - "regex-syntax 0.8.4", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" -dependencies = [ - "regex-syntax 0.6.29", + "regex-automata", + "regex-syntax", ] [[package]] @@ -1885,62 +1468,15 @@ checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.4", + "regex-syntax", ] -[[package]] -name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - [[package]] name = "regex-syntax" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" -[[package]] -name = "reqwest" -version = "0.11.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" -dependencies = [ - "base64 0.21.7", - "bytes", - "encoding_rs", - "futures-core", - "futures-util", - "h2 0.3.26", - "http 0.2.12", - "http-body 0.4.6", - "hyper 0.14.29", - "hyper-rustls 0.24.2", - "ipnet", - "js-sys", - "log", - "mime", - "once_cell", - "percent-encoding", - "pin-project-lite", - "rustls 0.21.12", - "rustls-native-certs 0.6.3", - "rustls-pemfile 1.0.4", - "serde", - "serde_json", - "serde_urlencoded", - "sync_wrapper", - "system-configuration", - "tokio", - "tokio-rustls 0.24.1", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "winreg 0.50.0", -] - [[package]] name = "reqwest" version = "0.12.4" @@ -1949,7 +1485,6 @@ checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10" dependencies = [ "base64 0.22.1", "bytes", - "encoding_rs", "futures-core", "futures-util", "h2 0.4.5", @@ -1957,30 +1492,26 @@ dependencies = [ "http-body 1.0.0", "http-body-util", "hyper 1.3.1", - "hyper-rustls 0.26.0", - "hyper-tls", + "hyper-rustls", "hyper-util", "ipnet", "js-sys", "log", "mime", "mime_guess", - "native-tls", "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.22.4", - "rustls-native-certs 0.7.0", - "rustls-pemfile 2.1.2", + "rustls", + "rustls-native-certs", + "rustls-pemfile", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", - "system-configuration", "tokio", - "tokio-native-tls", - "tokio-rustls 0.25.0", + "tokio-rustls", "tokio-util", "tower-service", "url", @@ -1989,7 +1520,7 @@ dependencies = [ "wasm-streams", "web-sys", "webpki-roots", - "winreg 0.52.0", + "winreg", ] [[package]] @@ -2004,7 +1535,7 @@ dependencies = [ "mime", "nom", "pin-project-lite", - "reqwest 0.12.4", + "reqwest", "thiserror", ] @@ -2029,31 +1560,6 @@ version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" -[[package]] -name = "rustix" -version = "0.38.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" -dependencies = [ - "bitflags 2.5.0", - "errno", - "libc", - "linux-raw-sys", - "windows-sys 0.52.0", -] - -[[package]] -name = "rustls" -version = "0.21.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" -dependencies = [ - "log", - "ring", - "rustls-webpki 0.101.7", - "sct", -] - [[package]] name = "rustls" version = "0.22.4" @@ -2063,23 +1569,11 @@ dependencies = [ "log", "ring", "rustls-pki-types", - "rustls-webpki 0.102.4", + "rustls-webpki", "subtle", "zeroize", ] -[[package]] -name = "rustls-native-certs" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" -dependencies = [ - "openssl-probe", - "rustls-pemfile 1.0.4", - "schannel", - "security-framework", -] - [[package]] name = "rustls-native-certs" version = "0.7.0" @@ -2087,21 +1581,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f1fb85efa936c42c6d5fc28d2629bb51e4b2f4b8a5211e297d599cc5a093792" dependencies = [ "openssl-probe", - "rustls-pemfile 2.1.2", + "rustls-pemfile", "rustls-pki-types", "schannel", "security-framework", ] -[[package]] -name = "rustls-pemfile" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" -dependencies = [ - "base64 0.21.7", -] - [[package]] name = "rustls-pemfile" version = "2.1.2" @@ -2118,16 +1603,6 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" -[[package]] -name = "rustls-webpki" -version = "0.101.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "rustls-webpki" version = "0.102.4" @@ -2175,16 +1650,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "sct" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "secrecy" version = "0.8.0" @@ -2249,16 +1714,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_path_to_error" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6" -dependencies = [ - "itoa", - "serde", -] - [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -2277,15 +1732,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012" -[[package]] -name = "sharded-slab" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" -dependencies = [ - "lazy_static", -] - [[package]] name = "signal-hook-registry" version = "1.4.2" @@ -2295,54 +1741,21 @@ dependencies = [ "libc", ] -[[package]] -name = "simple_asn1" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc4e5204eb1910f40f9cfa375f6f05b68c3abac4b6fd879c8ff5e7ae8a0a085" -dependencies = [ - "num-bigint", - "num-traits", - "thiserror", - "time", -] - [[package]] name = "slab" version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" dependencies = [ - "autocfg", -] - -[[package]] -name = "smallvec" -version = "1.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" - -[[package]] -name = "snafu" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418b8136fec49956eba89be7da2847ec1909df92a9ae4178b5ff0ff092c8d95e" -dependencies = [ - "snafu-derive", -] - -[[package]] -name = "snafu-derive" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a4812a669da00d17d8266a0439eddcacbc88b17f732f927e52eeb9d196f7fb5" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn", + "autocfg", ] +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + [[package]] name = "socket2" version = "0.5.7" @@ -2359,6 +1772,12 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "strsim" version = "0.11.1" @@ -2393,6 +1812,37 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +[[package]] +name = "swiftide" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-openai", + "async-stream", + "async-trait", + "chrono", + "derive_builder", + "futures-util", + "ignore", + "indoc", + "itertools 0.13.0", + "qdrant-client", + "redis", + "serde", + "serde_json", + "strum", + "strum_macros", + "text-splitter", + "tokio", + "tracing", + "tree-sitter", + "tree-sitter-javascript", + "tree-sitter-python", + "tree-sitter-ruby", + "tree-sitter-rust", + "tree-sitter-typescript", +] + [[package]] name = "syn" version = "2.0.66" @@ -2411,36 +1861,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" [[package]] -name = "system-configuration" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" -dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "system-configuration-sys", -] - -[[package]] -name = "system-configuration-sys" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" -dependencies = [ - "core-foundation-sys", - "libc", -] - -[[package]] -name = "tempfile" -version = "3.10.1" +name = "synstructure" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ - "cfg-if", - "fastrand", - "rustix", - "windows-sys 0.52.0", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -2482,61 +1910,15 @@ dependencies = [ ] [[package]] -name = "thread_local" -version = "1.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" -dependencies = [ - "cfg-if", - "once_cell", -] - -[[package]] -name = "time" -version = "0.3.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" -dependencies = [ - "deranged", - "itoa", - "num-conv", - "powerfmt", - "serde", - "time-core", - "time-macros", -] - -[[package]] -name = "time-core" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" - -[[package]] -name = "time-macros" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" -dependencies = [ - "num-conv", - "time-core", -] - -[[package]] -name = "tinyvec" -version = "1.6.0" +name = "tinystr" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" dependencies = [ - "tinyvec_macros", + "displaydoc", + "zerovec", ] -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - [[package]] name = "tokio" version = "1.38.0" @@ -2577,16 +1959,6 @@ dependencies = [ "syn", ] -[[package]] -name = "tokio-native-tls" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" -dependencies = [ - "native-tls", - "tokio", -] - [[package]] name = "tokio-retry" version = "0.3.0" @@ -2598,23 +1970,13 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-rustls" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" -dependencies = [ - "rustls 0.21.12", - "tokio", -] - [[package]] name = "tokio-rustls" version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" dependencies = [ - "rustls 0.22.4", + "rustls", "rustls-pki-types", "tokio", ] @@ -2638,12 +2000,8 @@ checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" dependencies = [ "bytes", "futures-core", - "futures-io", "futures-sink", - "futures-util", - "hashbrown 0.14.5", "pin-project-lite", - "slab", "tokio", ] @@ -2663,15 +2021,15 @@ dependencies = [ "http 0.2.12", "http-body 0.4.6", "hyper 0.14.29", - "hyper-timeout 0.4.1", + "hyper-timeout", "percent-encoding", "pin-project", "prost", - "rustls-native-certs 0.7.0", - "rustls-pemfile 2.1.2", + "rustls-native-certs", + "rustls-pemfile", "rustls-pki-types", "tokio", - "tokio-rustls 0.25.0", + "tokio-rustls", "tokio-stream", "tower", "tower-layer", @@ -2699,26 +2057,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "tower-http" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" -dependencies = [ - "bitflags 2.5.0", - "bytes", - "futures-util", - "http 1.1.0", - "http-body 1.0.0", - "http-body-util", - "iri-string", - "pin-project-lite", - "tower", - "tower-layer", - "tower-service", - "tracing", -] - [[package]] name = "tower-layer" version = "0.3.2" @@ -2761,67 +2099,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", - "valuable", -] - -[[package]] -name = "tracing-log" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" -dependencies = [ - "log", - "once_cell", - "tracing-core", -] - -[[package]] -name = "tracing-opentelemetry" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f68803492bf28ab40aeccaecc7021096bd256baf7ca77c3d425d89b35a7be4e4" -dependencies = [ - "js-sys", - "once_cell", - "opentelemetry 0.23.0", - "opentelemetry_sdk 0.23.0", - "smallvec", - "tracing", - "tracing-core", - "tracing-log", - "tracing-subscriber", - "web-time", -] - -[[package]] -name = "tracing-serde" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1" -dependencies = [ - "serde", - "tracing-core", -] - -[[package]] -name = "tracing-subscriber" -version = "0.3.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" -dependencies = [ - "matchers", - "nu-ansi-term", - "once_cell", - "regex", - "serde", - "serde_json", - "sharded-slab", - "smallvec", - "thread_local", - "tracing", - "tracing-core", - "tracing-log", - "tracing-serde", ] [[package]] @@ -2899,27 +2176,12 @@ dependencies = [ "version_check", ] -[[package]] -name = "unicode-bidi" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" - [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" -[[package]] -name = "unicode-normalization" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] - [[package]] name = "unicode-segmentation" version = "1.11.0" @@ -2934,43 +2196,26 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.0" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "f7c25da092f0a868cdf09e8674cd3b7ef3a7d92a24253e663a2fb85e2496de56" dependencies = [ "form_urlencoded", "idna", "percent-encoding", - "serde", -] - -[[package]] -name = "urlencoding" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" - -[[package]] -name = "uuid" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" -dependencies = [ - "getrandom", - "serde", ] [[package]] -name = "valuable" -version = "0.1.0" +name = "utf16_iter" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" [[package]] -name = "vcpkg" -version = "0.2.15" +name = "utf8_iter" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "version_check" @@ -3092,16 +2337,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "web-time" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "webpki-roots" version = "0.26.2" @@ -3111,22 +2346,6 @@ dependencies = [ "rustls-pki-types", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" version = "0.1.8" @@ -3136,12 +2355,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-core" version = "0.52.0" @@ -3292,22 +2505,48 @@ checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" [[package]] name = "winreg" -version = "0.50.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" dependencies = [ "cfg-if", "windows-sys 0.48.0", ] [[package]] -name = "winreg" -version = "0.52.0" +name = "write16" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" dependencies = [ - "cfg-if", - "windows-sys 0.48.0", + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", ] [[package]] @@ -3330,8 +2569,51 @@ dependencies = [ "syn", ] +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + +[[package]] +name = "zerovec" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb2cc8827d6c0994478a15c53f374f46fbd41bea663d809b14744bc42e6b109c" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97cf56601ee5052b4417d90c8755c6683473c926039908196cf35d99f893ebe7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index fb24cca3..7d011ae8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,83 +3,5 @@ incremental = true debug = 0 [workspace] -members = ["crates/*"] +members = ["swiftide"] resolver = "2" - -[workspace.package] -# Minimum Rust version -rust-version = "1.78" - -[workspace.dependencies] -serde = { version = "1.0.203", features = ["derive"] } -serde_json = "1.0.117" -serde_yaml = "0.9.34" -anyhow = { version = "1.0.86", features = ["backtrace"] } -tokio = { version = "1.38.0", features = ["full"] } -tokio-util = { version = "0.7.11", features = ["full"] } -async-openai = "0.21.0" -async-trait = "0.1.80" -url = "2.5.0" -indoc = "2.0.5" -reqwest = { version = "0.12.4" } -futures-util = "0.3.30" -once_cell = "1.19.0" -dotenvy = "0.15.7" -regex = "1.10.4" -rand = "0.8.5" -askama = "0.12.1" -uuid = { version = "1.8.0", features = ["v4", "serde"] } -shell-escape = "0.1.5" -octocrab = "0.38.0" -strum = { version = "0.26.2", features = ["derive"] } -strum_macros = "0.26.2" -redis = { version = "0.25.4", features = [ - "aio", - "tokio-comp", - "connection-manager", - "tokio-rustls-comp", -] } -derive_builder = "0.20.0" - -# Web -axum = { version = "0.7.5", features = [ - "multipart", - "tracing", - "ws", - "macros", -] } -tower-http = { version = "0.5.2", features = ["trace", "limit"] } - -# Testing -test-log = { version = "0.2.16", default-features = false, features = [ - "trace", -] } -wiremock = { version = "0.6.0" } -mockall = { version = "0.12.1" } - -# CLI -clap = { version = "4.5.4", features = ["derive"] } -spinners = { version = "4.1.1" } - -# Websockets -tokio-tungstenite = { version = "0.21.0" } - -# Telemetry -opentelemetry = "0.23.0" -opentelemetry_sdk = { version = "0.23.0", features = ["rt-tokio"] } -opentelemetry-stdout = { version = "0.4.0", features = ["trace"] } -tracing-opentelemetry = "0.24.0" -opentelemetry-otlp = { version = "0.16.0", features = [ - "http-proto", - "reqwest-client", - "reqwest-rustls", -] } -tracing = { version = "0.1.40", features = ["log"] } -tracing-subscriber = { version = "0.3.18", features = [ - "env-filter", - "ansi", - "json", -] } -itertools = { version = "0.13.0" } -qdrant-client = { version = "1.9.0" } -chrono = { version = "0.4.38" } diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..0dac1474 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Bosun.ai + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/crates/code_ops/Cargo.toml b/crates/code_ops/Cargo.toml deleted file mode 100644 index 471e174b..00000000 --- a/crates/code_ops/Cargo.toml +++ /dev/null @@ -1,27 +0,0 @@ -[package] -name = "code_ops" -version = "0.1.0" -edition = "2021" -rust-version.workspace = true - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -tree-sitter = "0.22.5" -tree-sitter-rust = "0.21.0" -indoc = { workspace = true } -anyhow = { workspace = true } -infrastructure = { path = "../infrastructure/" } -tree-sitter-python = "0.21.0" -tree-sitter-ruby = "0.21.0" -tree-sitter-typescript = "0.21.1" -tree-sitter-javascript = "0.21.3" -derive_builder = { workspace = true } -strum = { workspace = true } -strum_macros = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -uuid = { workspace = true } - -[build-dependencies] -cc = "1.0.95" diff --git a/crates/code_ops/src/code_parser.rs b/crates/code_ops/src/code_parser.rs deleted file mode 100644 index 10abf6bc..00000000 --- a/crates/code_ops/src/code_parser.rs +++ /dev/null @@ -1,80 +0,0 @@ -#![allow(dead_code)] -use crate::SupportedLanguages; -use anyhow::{Context as _, Result}; -use tree_sitter::{Node, Parser, Tree}; - -use crate::supported_language_to_tree_sitter; - -pub struct CodeParser { - parser: Parser, -} - -pub struct CodeNode { - // parent: Option<Box<CodeNode<'a>>>, - children: Vec<CodeNode>, - pub kind: String, - pub grammar_name: String, - pub name: String, -} - -pub struct CodeTree { - pub root_node: CodeNode, - ts_tree: Tree, -} - -impl CodeTree { - // Walks over the tree tracking the depth of the node, allowing to call a function with the - // depth and the node - #[allow(clippy::only_used_in_recursion)] - pub fn walk<T>(&self, node: &CodeNode, depth: usize, f: &impl Fn(usize, &CodeNode) -> T) -> T { - let res = f(depth, node); - for child in &node.children { - self.walk(child, depth + 1, f); - } - res - } -} - -impl CodeParser { - pub fn try_new(language: SupportedLanguages) -> Result<Self> { - let mut parser = Parser::new(); - parser.set_language(&supported_language_to_tree_sitter(&language))?; - - Ok(Self { parser }) - } - - pub fn parse(&mut self, code: &str) -> Result<CodeTree> { - let tree = self.parser.parse(code, None).context("No nodes found")?; - - let parsed_root_node = parse_node(tree.root_node(), code); - let code_tree = CodeTree { - ts_tree: tree, - root_node: parsed_root_node, - }; - Ok(code_tree) - } -} - -fn parse_node(node: Node, code: &str) -> CodeNode { - let mut children = vec![]; - - // Assume that unnamed nodes have no children - // It's a rought world - let end_byte = node - .child(0) - .map(|n| n.start_byte()) - .unwrap_or_else(|| node.end_byte()); - - for child in node.named_children(&mut node.walk()) { - let child_node = parse_node(child, code); - children.push(child_node); - } - - CodeNode { - // ts_node: node, - grammar_name: node.grammar_name().to_string(), - kind: node.kind().to_string(), - name: code[node.start_byte()..end_byte].to_string(), - children, - } -} diff --git a/crates/code_ops/src/lib.rs b/crates/code_ops/src/lib.rs deleted file mode 100644 index b4db1f1f..00000000 --- a/crates/code_ops/src/lib.rs +++ /dev/null @@ -1,17 +0,0 @@ -mod code_parser; -mod code_splitter; -mod supported_languages; - -pub use supported_languages::SupportedLanguages; -use tree_sitter::Language; -pub use {code_parser::CodeParser, code_splitter::ChunkSize, code_splitter::CodeSplitter}; - -pub(crate) fn supported_language_to_tree_sitter(language: &SupportedLanguages) -> Language { - match language { - SupportedLanguages::Rust => tree_sitter_rust::language(), - SupportedLanguages::Python => tree_sitter_python::language(), - SupportedLanguages::Typescript => tree_sitter_typescript::language_typescript(), - SupportedLanguages::Javascript => tree_sitter_javascript::language(), - SupportedLanguages::Ruby => tree_sitter_ruby::language(), - } -} diff --git a/crates/code_ops/src/supported_languages.rs b/crates/code_ops/src/supported_languages.rs deleted file mode 100644 index 44bcc903..00000000 --- a/crates/code_ops/src/supported_languages.rs +++ /dev/null @@ -1,120 +0,0 @@ -use serde::{Deserialize, Serialize}; -pub use std::str::FromStr; -use strum::EnumString; - -#[derive( - Deserialize, - Serialize, - Debug, - PartialEq, - EnumString, - Clone, - Copy, - strum_macros::EnumIter, - strum_macros::Display, -)] -#[strum(serialize_all = "snake_case", ascii_case_insensitive)] -#[serde(try_from = "String", rename_all = "snake_case")] -pub enum SupportedLanguages { - Rust, - Typescript, - Python, - Ruby, - Javascript, -} - -// NOTE: These froms are weird, should be into? Also, should be some way to let either serde or -// strum handle this -impl From<SupportedLanguages> for String { - fn from(val: SupportedLanguages) -> Self { - match val { - SupportedLanguages::Rust => "rust".to_owned(), - SupportedLanguages::Typescript => "typescript".to_owned(), - SupportedLanguages::Javascript => "javascript".to_owned(), - SupportedLanguages::Python => "python".to_owned(), - SupportedLanguages::Ruby => "ruby".to_owned(), - } - } -} - -impl From<SupportedLanguages> for &str { - fn from(val: SupportedLanguages) -> Self { - match val { - SupportedLanguages::Rust => "rust", - SupportedLanguages::Typescript => "typescript", - SupportedLanguages::Javascript => "javascript", - SupportedLanguages::Python => "python", - SupportedLanguages::Ruby => "ruby", - } - } -} - -impl TryFrom<String> for SupportedLanguages { - type Error = strum::ParseError; - fn try_from(value: String) -> Result<Self, Self::Error> { - SupportedLanguages::from_str(&value) - } -} - -static RUST_EXTENSIONS: &[&str] = &["rs"]; -static TYPESCRIPT_EXTENSIONS: &[&str] = &["ts", "tsx", "js", "jsx"]; -static PYTHON_EXTENSIONS: &[&str] = &["py"]; -static RUBY_EXTENSIONS: &[&str] = &["rb"]; -static JAVASCRIPT_EXTENSIONS: &[&str] = &["js", "jsx"]; - -impl SupportedLanguages { - pub fn file_extensions(&self) -> &[&str] { - match self { - SupportedLanguages::Rust => RUST_EXTENSIONS, - SupportedLanguages::Typescript => TYPESCRIPT_EXTENSIONS, - SupportedLanguages::Python => PYTHON_EXTENSIONS, - SupportedLanguages::Ruby => RUBY_EXTENSIONS, - SupportedLanguages::Javascript => JAVASCRIPT_EXTENSIONS, - } - } -} - -#[cfg(test)] -mod test { - use super::*; - use std::str::FromStr; - - #[test] - fn test_supported_languages_into_string() { - assert_eq!(SupportedLanguages::Rust.to_string(), "rust"); - assert_eq!(SupportedLanguages::Typescript.to_string(), "typescript"); - } - - #[test] - fn test_supported_languages_into_str() { - assert_eq!(Into::<&str>::into(SupportedLanguages::Rust), "rust"); - assert_eq!( - Into::<&str>::into(SupportedLanguages::Typescript), - "typescript" - ); - } - - #[test] - fn test_supported_languages_from_str() { - assert_eq!( - SupportedLanguages::from_str("rust"), - Ok(SupportedLanguages::Rust) - ); - assert_eq!( - SupportedLanguages::from_str("typescript"), - Ok(SupportedLanguages::Typescript) - ); - } - - #[test] - fn test_supported_languages_from_str_case_insensitive() { - assert_eq!( - SupportedLanguages::from_str("Rust"), - Ok(SupportedLanguages::Rust) - ); - assert_eq!( - SupportedLanguages::from_str("TypeScript"), - Ok(SupportedLanguages::Typescript) - ); - } -} diff --git a/crates/indexing/Cargo.toml b/crates/indexing/Cargo.toml deleted file mode 100644 index 30aa7bba..00000000 --- a/crates/indexing/Cargo.toml +++ /dev/null @@ -1,26 +0,0 @@ -[package] -name = "indexing" -version = "0.1.0" -edition = "2021" -rust-version.workspace = true - -[dependencies] -ignore = "0.4.22" -code_ops = { path = "../code_ops" } -infrastructure = { path = "../infrastructure" } -anyhow = { workspace = true } -qdrant-client = { workspace = true } -tracing = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -itertools = { workspace = true } -tokio = { workspace = true } -async-stream = "0.3.5" -async-trait = { workspace = true } -futures-util = { workspace = true } -tokio-stream = "0.1.15" -uuid = { workspace = true } -indoc = { workspace = true } -redis = { workspace = true } -text-splitter = { version = "0.13.1", features = ["markdown"] } -chrono = { workspace = true } diff --git a/crates/indexing/src/lib.rs b/crates/indexing/src/lib.rs deleted file mode 100644 index b3c5c4fa..00000000 --- a/crates/indexing/src/lib.rs +++ /dev/null @@ -1,11 +0,0 @@ -pub mod loaders; -pub mod node_caches; -pub mod query; -pub mod storage; -pub mod transformers; - -mod ingestion_node; -mod traits; - -mod ingestion_pipeline; -pub use ingestion_pipeline::IngestionPipeline; diff --git a/crates/indexing/src/node_caches/mod.rs b/crates/indexing/src/node_caches/mod.rs deleted file mode 100644 index ddf3c8c7..00000000 --- a/crates/indexing/src/node_caches/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod redis; -pub use redis::Redis; diff --git a/crates/indexing/src/query.rs b/crates/indexing/src/query.rs deleted file mode 100644 index e498d887..00000000 --- a/crates/indexing/src/query.rs +++ /dev/null @@ -1,75 +0,0 @@ -use anyhow::Result; -use indoc::formatdoc; -use infrastructure::Embed; -use infrastructure::SimplePrompt; -use qdrant_client::qdrant::SearchPoints; - -/// Performs a naive search using qdrant and openai -/// -/// When we add more complicated rag query -/// logic, nice to have a pipeline similar to ingestion and abstract away over the storage. -/// -/// This is just quick and dirty so we can get databuoy out. -#[tracing::instrument( - skip(query, storage_namespace), - fields(query, response), - err, - name = "indexing.query.naieve" -)] -pub async fn naive(query: &str, storage_namespace: &str) -> Result<String> { - let qdrant = infrastructure::create_qdrant_client()?; - let openai = infrastructure::create_openai_client(); - - let embedding_model = infrastructure::DEFAULT_OPENAI_EMBEDDING_MODEL; - - let mut embedded_query = openai - .embed(vec![query.to_string()], embedding_model) - .await?; - - let search_result = qdrant - .search_points(&SearchPoints { - collection_name: storage_namespace.to_string(), - vector: embedded_query - .drain(0..1) - .next() - .ok_or(anyhow::anyhow!("No query vector"))?, - limit: 10, - with_payload: Some(true.into()), - ..Default::default() - }) - .await?; - - let result_context = search_result - .result - .into_iter() - .fold(String::new(), |acc, point| { - point - .payload - .into_iter() - .fold(acc, |acc, (k, v)| format!("{}\n{}: {}", acc, k, v)) - }); - - tracing::Span::current().record("query", query); - - let prompt = formatdoc!( - r#" - Answer the following question(s): - {query} - - ## Constraints - * Only answer based on the provided context below - * Answer the question fully and remember to be concise - - ## Additional information found - {result_context} - "#, - ); - - let response = openai - .prompt(&prompt, infrastructure::DEFAULT_OPENAI_MODEL) - .await?; - - tracing::Span::current().record("response", &response); - - Ok(response) -} diff --git a/crates/indexing/src/storage/mod.rs b/crates/indexing/src/storage/mod.rs deleted file mode 100644 index 30fb0483..00000000 --- a/crates/indexing/src/storage/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod qdrant; - -pub use qdrant::Qdrant; diff --git a/crates/infrastructure/Cargo.toml b/crates/infrastructure/Cargo.toml deleted file mode 100644 index 51144feb..00000000 --- a/crates/infrastructure/Cargo.toml +++ /dev/null @@ -1,44 +0,0 @@ -[package] -name = "infrastructure" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -dotenvy = { workspace = true } -anyhow = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -reqwest = { workspace = true, features = ["json"] } -tokio = { workspace = true } -tokio-util = { workspace = true } -once_cell = { workspace = true } -async-openai = { workspace = true } -async-trait = { workspace = true } -uuid = { workspace = true, features = ["serde"] } -rand = { workspace = true } -strum = { workspace = true } -strum_macros = { workspace = true } - -# tracing and otel -tracing = { workspace = true } -tracing-subscriber = { workspace = true } - -opentelemetry = { workspace = true } -opentelemetry_sdk = { workspace = true } -tracing-opentelemetry = { workspace = true } -opentelemetry-otlp = { workspace = true } -octocrab = { workspace = true } -jsonwebtoken = "9.2.0" -url.workspace = true -itertools = { workspace = true } -base64 = "0.22.0" -qdrant-client = { workspace = true } -opentelemetry-aws = "0.10.0" - -[dev-dependencies] - -[features] -integration_testing = [] -default = [] diff --git a/crates/infrastructure/src/config.rs b/crates/infrastructure/src/config.rs deleted file mode 100644 index ae9adae1..00000000 --- a/crates/infrastructure/src/config.rs +++ /dev/null @@ -1,57 +0,0 @@ -use dotenvy::dotenv; -use std::env; -use std::sync::OnceLock; - -static CONFIG: OnceLock<Config> = OnceLock::new(); - -pub struct Config { - pub port: String, - pub openai_api_key: String, - pub otel_enabled: bool, - pub openai_endpoint: Option<String>, - pub github_app_id: Option<u64>, - pub qdrant_url: Option<String>, - pub qdrant_api_key: Option<String>, - pub redis_url: Option<String>, -} - -impl Config { - pub fn from_env() -> &'static Config { - CONFIG.get_or_init(|| { - tracing::info!("Loading config from environment"); - dotenv().ok(); - - let port = env::var("PORT").expect("PORT env var not set"); - let openai_api_key = - env::var("OPENAI_API_KEY").expect("OPENAI_API_KEY env var not set"); - let otel_enabled = env::var("OTEL_ENABLED") - .expect("OTEL_ENABLED env var not set") - .parse::<bool>() - .expect("OTEL_ENABLED env var must be a boolean"); - let openai_endpoint = env::var("OPENAI_ENDPOINT").ok(); - - let github_app_id: Option<u64> = env::var("GITHUB_APP_ID") - .map(|s| s.parse::<u64>().expect("GITHUB_APP_ID must be a number")) - .ok(); - - let qdrant_url = env::var("QDRANT_URL").ok(); - let qdrant_api_key = env::var("QDRANT_API_KEY").ok(); - let redis_url = env::var("REDIS_URL").ok(); - - Self { - port, - openai_api_key, - otel_enabled, - openai_endpoint, - github_app_id, - qdrant_url, - qdrant_api_key, - redis_url, - } - }) - } - - pub fn otel_enabled(&self) -> bool { - self.otel_enabled - } -} diff --git a/crates/infrastructure/src/lib.rs b/crates/infrastructure/src/lib.rs deleted file mode 100644 index f84afb20..00000000 --- a/crates/infrastructure/src/lib.rs +++ /dev/null @@ -1,165 +0,0 @@ -use std::fmt::Debug; - -use anyhow::{Context as _, Result}; -use async_openai::types::{ - ChatCompletionRequestMessage, ChatCompletionRequestUserMessageArgs, - CreateChatCompletionRequestArgs, CreateEmbeddingRequestArgs, -}; -use async_trait::async_trait; - -mod config; -pub const DEFAULT_OPENAI_MODEL: &str = "gpt-4o"; -pub const DEFAULT_OPENAI_EMBEDDING_MODEL: &str = "text-embedding-3-small"; - -use qdrant_client::client::QdrantClient; - -// Loads the global config async -pub fn config() -> &'static config::Config { - config::Config::from_env() -} - -pub fn create_openai_client() -> async_openai::Client<async_openai::config::OpenAIConfig> { - let mut openai_config = - async_openai::config::OpenAIConfig::new().with_api_key(&config().openai_api_key); - - // Enables mocking in tests - if let Some(endpoint) = &config().openai_endpoint { - openai_config = openai_config.with_api_base(endpoint); - } else if cfg!(feature = "integration_testing") { - panic!("Openai endpoint not set in testing"); - } - async_openai::Client::with_config(openai_config) -} - -pub fn create_qdrant_client() -> Result<QdrantClient> { - let url = &config() - .qdrant_url - .as_deref() - .ok_or(anyhow::anyhow!("qdrant url missing from config"))?; - - QdrantClient::from_url(url) - .with_api_key(config().qdrant_api_key.clone()) - .build() -} - -#[async_trait] -pub trait SimplePrompt: Debug + Send + Sync { - // Takes a simple prompt, prompts the llm and returns the response - async fn prompt(&self, prompt: &str, model: &str) -> Result<String>; -} - -#[async_trait] -pub trait SimpleCompletion<T> { - async fn complete(&self, messages: Vec<T>, prompt: &str, model: &str) -> Result<String>; -} - -#[async_trait] -pub trait Embed: Debug + Send + Sync { - async fn embed(&self, input: Vec<String>, model: &str) -> Result<Embeddings>; -} - -#[async_trait] -#[allow(clippy::blocks_in_conditions)] -impl SimplePrompt for async_openai::Client<async_openai::config::OpenAIConfig> { - #[tracing::instrument(skip(self), err)] - async fn prompt(&self, prompt: &str, model: &str) -> Result<String> { - let request = CreateChatCompletionRequestArgs::default() - .model(model) - .messages(vec![ChatCompletionRequestUserMessageArgs::default() - .content(prompt) - .build()? - .into()]) - .build()?; - - tracing::debug!( - messages = serde_json::to_string_pretty(&request)?, - "[SimplePrompt] Request to openai" - ); - - let mut response = self.chat().create(request).await?; - - tracing::debug!( - response = serde_json::to_string_pretty(&response)?, - "[SimplePrompt] Response from openai" - ); - - response - .choices - .remove(0) - .message - .content - .take() - .context("Expected content in response") - } -} - -#[async_trait] -#[allow(clippy::blocks_in_conditions)] -impl SimpleCompletion<ChatCompletionRequestMessage> - for async_openai::Client<async_openai::config::OpenAIConfig> -{ - // Takes a simple prompt, prompts the llm and returns the response - #[tracing::instrument(skip(self), err)] - async fn complete( - &self, - messages: Vec<ChatCompletionRequestMessage>, - prompt: &str, - model: &str, - ) -> Result<String> { - let mut messages = messages.to_vec(); - messages.push( - ChatCompletionRequestUserMessageArgs::default() - .content(prompt) - .build()? - .into(), - ); - - let request = CreateChatCompletionRequestArgs::default() - .model(model) - .messages(messages) - .build()?; - - tracing::debug!( - messages = serde_json::to_string_pretty(&request)?, - "[SimpleCompletion] Request to openai" - ); - - let response = self.chat().create(request).await?; - - tracing::debug!( - response = serde_json::to_string_pretty(&response)?, - "[SimpleCompletion] Response from openai" - ); - - Ok(response - .choices - .first() - .unwrap() - .message - .content - .as_ref() - .expect("Expected content in response") - .clone()) - } -} - -type Embeddings = Vec<Vec<f32>>; - -#[async_trait] -impl Embed for async_openai::Client<async_openai::config::OpenAIConfig> { - // WARN: Openai-async clones the input - async fn embed(&self, input: Vec<String>, model: &str) -> Result<Embeddings> { - let request = CreateEmbeddingRequestArgs::default() - .model(model) - .input(input) - .build()?; - tracing::debug!( - messages = serde_json::to_string_pretty(&request)?, - "[Embed] Request to openai" - ); - let response = self.embeddings().create(request).await?; - tracing::debug!("[Embed] Response openai"); - - Ok(response.data.into_iter().map(|d| d.embedding).collect()) - } -} diff --git a/swiftide/Cargo.toml b/swiftide/Cargo.toml new file mode 100644 index 00000000..a1a27959 --- /dev/null +++ b/swiftide/Cargo.toml @@ -0,0 +1,53 @@ +[package] +name = "swiftide" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { version = "1.0.86", features = ["backtrace"] } +async-stream = "0.3.5" +async-trait = "0.1.80" +chrono = { version = "0.4.38" } +derive_builder = "0.20.0" +futures-util = "0.3.30" +ignore = "0.4.22" +indoc = "2.0.5" +itertools = { version = "0.13.0" } +serde = { version = "1.0.203", features = ["derive"] } +serde_json = "1.0.117" +text-splitter = { version = "0.13.1", features = ["markdown"] } +tokio = { version = "1.38.0", features = ["full"] } +tracing = { version = "0.1.40", features = ["log"] } +strum = "0.26.2" +strum_macros = "0.26.4" + +# Integrations +async-openai = { version = "0.23.2", optional = true } +qdrant-client = { version = "1.9.0", optional = true } +redis = { version = "0.25.4", features = [ + "aio", + "tokio-comp", + "connection-manager", + "tokio-rustls-comp", +], optional = true } +tree-sitter = { version = "0.22.5", optional = true } +tree-sitter-rust = { version = "0.21.0", optional = true } +tree-sitter-python = { version = "0.21.0", optional = true } +tree-sitter-ruby = { version = "0.21.0", optional = true } +tree-sitter-typescript = { version = "0.21.1", optional = true } +tree-sitter-javascript = { version = "0.21.3", optional = true } + +[features] +default = ["all"] +all = ["qdrant", "redis", "tree-sitter", "openai"] +qdrant = ["dep:qdrant-client"] +redis = ["dep:redis"] +tree-sitter = [ + "dep:tree-sitter", + "dep:tree-sitter-rust", + "dep:tree-sitter-python", + "dep:tree-sitter-ruby", + "dep:tree-sitter-typescript", + "dep:tree-sitter-javascript", +] +openai = ["dep:async-openai"] diff --git a/swiftide/src/embeddings.rs b/swiftide/src/embeddings.rs new file mode 100644 index 00000000..d03f7d87 --- /dev/null +++ b/swiftide/src/embeddings.rs @@ -0,0 +1 @@ +pub type Embeddings = Vec<Vec<f32>>; diff --git a/crates/indexing/src/ingestion_node.rs b/swiftide/src/ingestion/ingestion_node.rs similarity index 50% rename from crates/indexing/src/ingestion_node.rs rename to swiftide/src/ingestion/ingestion_node.rs index ef6532c3..b4ec2e4e 100644 --- a/crates/indexing/src/ingestion_node.rs +++ b/swiftide/src/ingestion/ingestion_node.rs @@ -1,15 +1,9 @@ -use anyhow::{Context as _, Result}; use std::{ collections::HashMap, hash::{Hash, Hasher}, path::PathBuf, }; -use qdrant_client::{ - client::Payload, - qdrant::{self, Value}, -}; - #[derive(Debug, Default, Clone)] pub struct IngestionNode { pub id: Option<u64>, @@ -45,34 +39,3 @@ impl Hash for IngestionNode { self.chunk.hash(state); } } - -impl TryInto<qdrant::PointStruct> for IngestionNode { - type Error = anyhow::Error; - - fn try_into(mut self) -> Result<qdrant::PointStruct> { - let id = self.calculate_hash(); - - self.metadata.extend([ - ("path".to_string(), self.path.to_string_lossy().to_string()), - ("content".to_string(), self.chunk), - ( - "last_updated_at".to_string(), - chrono::Utc::now().to_rfc3339(), - ), - ]); - - // Damn who build this api - let payload: Payload = self - .metadata - .iter() - .map(|(k, v)| (k.as_str(), Value::from(v.as_str()))) - .collect::<HashMap<&str, Value>>() - .into(); - - Ok(qdrant::PointStruct::new( - id, - self.vector.context("Vector is not set")?, - payload, - )) - } -} diff --git a/crates/indexing/src/ingestion_pipeline.rs b/swiftide/src/ingestion/ingestion_pipeline.rs similarity index 94% rename from crates/indexing/src/ingestion_pipeline.rs rename to swiftide/src/ingestion/ingestion_pipeline.rs index 799fd7fd..e0f705b4 100644 --- a/crates/indexing/src/ingestion_pipeline.rs +++ b/swiftide/src/ingestion/ingestion_pipeline.rs @@ -1,14 +1,10 @@ -use crate::ingestion_node::IngestionNode; -use crate::traits::{ - BatchableTransformer, ChunkerTransformer, Loader, NodeCache, Storage, Transformer, -}; +use crate::{BatchableTransformer, ChunkerTransformer, Loader, NodeCache, Storage, Transformer}; use anyhow::Result; -use futures_util::{Stream, StreamExt, TryFutureExt, TryStreamExt}; +use futures_util::{StreamExt, TryFutureExt, TryStreamExt}; -use std::pin::Pin; use std::sync::Arc; -pub type IngestionStream = Pin<Box<dyn Stream<Item = Result<IngestionNode>> + Send>>; +use super::{IngestionNode, IngestionStream}; pub struct IngestionPipeline { stream: IngestionStream, diff --git a/swiftide/src/ingestion/ingestion_stream.rs b/swiftide/src/ingestion/ingestion_stream.rs new file mode 100644 index 00000000..6eb61952 --- /dev/null +++ b/swiftide/src/ingestion/ingestion_stream.rs @@ -0,0 +1,7 @@ +use anyhow::Result; +use futures_util::stream::Stream; +use std::pin::Pin; + +use super::IngestionNode; + +pub type IngestionStream = Pin<Box<dyn Stream<Item = Result<IngestionNode>> + Send>>; diff --git a/swiftide/src/ingestion/mod.rs b/swiftide/src/ingestion/mod.rs new file mode 100644 index 00000000..c7f2eb05 --- /dev/null +++ b/swiftide/src/ingestion/mod.rs @@ -0,0 +1,7 @@ +mod ingestion_node; +mod ingestion_pipeline; +mod ingestion_stream; + +pub use ingestion_node::*; +pub use ingestion_pipeline::*; +pub use ingestion_stream::*; diff --git a/swiftide/src/integrations/mod.rs b/swiftide/src/integrations/mod.rs new file mode 100644 index 00000000..40695c0c --- /dev/null +++ b/swiftide/src/integrations/mod.rs @@ -0,0 +1,8 @@ +#[cfg(feature = "openai")] +pub mod openai; +#[cfg(feature = "qdrant")] +pub mod qdrant; +#[cfg(feature = "redis")] +pub mod redis; +#[cfg(feature = "tree-sitter")] +pub mod treesitter; diff --git a/swiftide/src/integrations/openai/embed.rs b/swiftide/src/integrations/openai/embed.rs new file mode 100644 index 00000000..91946324 --- /dev/null +++ b/swiftide/src/integrations/openai/embed.rs @@ -0,0 +1,26 @@ +use anyhow::Result; +use async_openai::types::CreateEmbeddingRequestArgs; +use async_trait::async_trait; + +use crate::{Embed, Embeddings}; + +use super::OpenAI; + +#[async_trait] +impl Embed for OpenAI { + async fn embed(&self, input: Vec<String>) -> Result<Embeddings> { + let request = CreateEmbeddingRequestArgs::default() + .model(&self.embed_model) + .input(input) + .build()?; + tracing::debug!( + messages = serde_json::to_string_pretty(&request)?, + "[Embed] Request to openai" + ); + let response = self.client.embeddings().create(request).await?; + tracing::debug!("[Embed] Response openai"); + + // WARN: Naively assumes that the order is preserved. Might not always be the case. + Ok(response.data.into_iter().map(|d| d.embedding).collect()) + } +} diff --git a/swiftide/src/integrations/openai/mod.rs b/swiftide/src/integrations/openai/mod.rs new file mode 100644 index 00000000..3320defc --- /dev/null +++ b/swiftide/src/integrations/openai/mod.rs @@ -0,0 +1,9 @@ +mod embed; +mod simple_prompt; + +#[derive(Debug)] +pub struct OpenAI { + client: async_openai::Client<async_openai::config::OpenAIConfig>, + embed_model: String, + prompt_model: String, +} diff --git a/swiftide/src/integrations/openai/simple_prompt.rs b/swiftide/src/integrations/openai/simple_prompt.rs new file mode 100644 index 00000000..1fee2814 --- /dev/null +++ b/swiftide/src/integrations/openai/simple_prompt.rs @@ -0,0 +1,40 @@ +use crate::SimplePrompt; +use async_openai::types::{ChatCompletionRequestUserMessageArgs, CreateChatCompletionRequestArgs}; +use async_trait::async_trait; + +use super::OpenAI; +use anyhow::{Context as _, Result}; + +#[async_trait] +impl SimplePrompt for OpenAI { + #[tracing::instrument(skip(self), err)] + async fn prompt(&self, prompt: &str) -> Result<String> { + let request = CreateChatCompletionRequestArgs::default() + .model(&self.prompt_model) + .messages(vec![ChatCompletionRequestUserMessageArgs::default() + .content(prompt) + .build()? + .into()]) + .build()?; + + tracing::debug!( + messages = serde_json::to_string_pretty(&request)?, + "[SimplePrompt] Request to openai" + ); + + let mut response = self.client.chat().create(request).await?; + + tracing::debug!( + response = serde_json::to_string_pretty(&response)?, + "[SimplePrompt] Response from openai" + ); + + response + .choices + .remove(0) + .message + .content + .take() + .context("Expected content in response") + } +} diff --git a/swiftide/src/integrations/qdrant/ingestion_node.rs b/swiftide/src/integrations/qdrant/ingestion_node.rs new file mode 100644 index 00000000..3cf67a5a --- /dev/null +++ b/swiftide/src/integrations/qdrant/ingestion_node.rs @@ -0,0 +1,39 @@ +use anyhow::{Context as _, Result}; +use std::collections::HashMap; + +use crate::ingestion::IngestionNode; +use qdrant_client::{ + client::Payload, + qdrant::{self, Value}, +}; + +impl TryInto<qdrant::PointStruct> for IngestionNode { + type Error = anyhow::Error; + + fn try_into(mut self) -> Result<qdrant::PointStruct> { + let id = self.calculate_hash(); + + self.metadata.extend([ + ("path".to_string(), self.path.to_string_lossy().to_string()), + ("content".to_string(), self.chunk), + ( + "last_updated_at".to_string(), + chrono::Utc::now().to_rfc3339(), + ), + ]); + + // Damn who build this api + let payload: Payload = self + .metadata + .iter() + .map(|(k, v)| (k.as_str(), Value::from(v.as_str()))) + .collect::<HashMap<&str, Value>>() + .into(); + + Ok(qdrant::PointStruct::new( + id, + self.vector.context("Vector is not set")?, + payload, + )) + } +} diff --git a/crates/indexing/src/storage/qdrant.rs b/swiftide/src/integrations/qdrant/mod.rs similarity index 55% rename from crates/indexing/src/storage/qdrant.rs rename to swiftide/src/integrations/qdrant/mod.rs index 80b014d1..4975a129 100644 --- a/crates/indexing/src/storage/qdrant.rs +++ b/swiftide/src/integrations/qdrant/mod.rs @@ -1,13 +1,10 @@ -#![allow(clippy::blocks_in_conditions)] - +mod ingestion_node; +mod persist; use anyhow::Result; -use async_trait::async_trait; -use qdrant_client::{ - client::QdrantClient, - qdrant::{vectors_config::Config, CreateCollection, Distance, VectorParams, VectorsConfig}, -}; - -use crate::traits::Storage; +use qdrant_client::client::QdrantClient; +use qdrant_client::prelude::*; +use qdrant_client::qdrant::vectors_config::Config; +use qdrant_client::qdrant::{VectorParams, VectorsConfig}; pub struct Qdrant { client: QdrantClient, @@ -63,44 +60,3 @@ impl Qdrant { Ok(()) } } - -#[async_trait] -impl Storage for Qdrant { - fn batch_size(&self) -> Option<usize> { - self.batch_size - } - - #[tracing::instrument(skip_all, err)] - async fn setup(&self) -> Result<()> { - self.create_index_if_not_exists().await - } - - #[tracing::instrument(skip_all, err, name = "storage.qdrant.store")] - async fn store(&self, node: crate::ingestion_node::IngestionNode) -> Result<()> { - self.client - .upsert_points_blocking( - self.collection_name.to_string(), - None, - vec![node.try_into()?], - None, - ) - .await?; - Ok(()) - } - - #[tracing::instrument(skip_all, err, name = "storage.qdrant.batch_store")] - async fn batch_store(&self, nodes: Vec<crate::ingestion_node::IngestionNode>) -> Result<()> { - self.client - .upsert_points_blocking( - self.collection_name.to_string(), - None, - nodes - .into_iter() - .map(TryInto::try_into) - .collect::<Result<Vec<_>>>()?, - None, - ) - .await?; - Ok(()) - } -} diff --git a/swiftide/src/integrations/qdrant/persist.rs b/swiftide/src/integrations/qdrant/persist.rs new file mode 100644 index 00000000..ff81a53d --- /dev/null +++ b/swiftide/src/integrations/qdrant/persist.rs @@ -0,0 +1,47 @@ +use anyhow::Result; +use async_trait::async_trait; + +use crate::traits::Storage; + +use super::Qdrant; + +#[async_trait] +impl Storage for Qdrant { + fn batch_size(&self) -> Option<usize> { + self.batch_size + } + + #[tracing::instrument(skip_all, err)] + async fn setup(&self) -> Result<()> { + self.create_index_if_not_exists().await + } + + #[tracing::instrument(skip_all, err, name = "storage.qdrant.store")] + async fn store(&self, node: crate::ingestion::IngestionNode) -> Result<()> { + self.client + .upsert_points_blocking( + self.collection_name.to_string(), + None, + vec![node.try_into()?], + None, + ) + .await?; + Ok(()) + } + + #[tracing::instrument(skip_all, err, name = "storage.qdrant.batch_store")] + async fn batch_store(&self, nodes: Vec<crate::ingestion::IngestionNode>) -> Result<()> { + self.client + .upsert_points_blocking( + self.collection_name.to_string(), + None, + nodes + .into_iter() + .map(TryInto::try_into) + .collect::<Result<Vec<_>>>()?, + None, + ) + .await?; + Ok(()) + } +} diff --git a/swiftide/src/integrations/redis/mod.rs b/swiftide/src/integrations/redis/mod.rs new file mode 100644 index 00000000..4c50045c --- /dev/null +++ b/swiftide/src/integrations/redis/mod.rs @@ -0,0 +1,3 @@ +mod node_cache; + +pub use node_cache::RedisNodeCache; diff --git a/crates/indexing/src/node_caches/redis.rs b/swiftide/src/integrations/redis/node_cache.rs similarity index 91% rename from crates/indexing/src/node_caches/redis.rs rename to swiftide/src/integrations/redis/node_cache.rs index a057f3e1..e8911045 100644 --- a/crates/indexing/src/node_caches/redis.rs +++ b/swiftide/src/integrations/redis/node_cache.rs @@ -4,18 +4,17 @@ use tokio::sync::RwLock; use anyhow::{Context as _, Result}; use async_trait::async_trait; -use crate::{ingestion_node::IngestionNode, traits::NodeCache}; +use crate::{ingestion::IngestionNode, traits::NodeCache}; -pub struct Redis { +pub struct RedisNodeCache { client: redis::Client, connection_manager: RwLock<Option<redis::aio::ConnectionManager>>, key_prefix: String, } -impl Redis { +impl RedisNodeCache { pub fn try_from_url(url: &str, prefix: &str) -> Result<Self> { let client = redis::Client::open(url).context("Failed to open redis client")?; - // TODO: Add namespace Ok(Self { client, connection_manager: RwLock::new(None), @@ -64,7 +63,7 @@ impl Redis { } // Redis CM does not implement debug -impl Debug for Redis { +impl Debug for RedisNodeCache { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Redis") .field("client", &self.client) @@ -73,7 +72,7 @@ impl Debug for Redis { } #[async_trait] -impl NodeCache for Redis { +impl NodeCache for RedisNodeCache { // false -> not cached, expect node to be processed // true -> cached, expect node to be skipped #[tracing::instrument(skip_all, name = "node_cache.redis.get", fields(hit))] @@ -127,11 +126,9 @@ mod tests { use std::collections::HashMap; #[tokio::test] async fn test_redis_cache() { - let redis_url = infrastructure::config() - .redis_url - .as_deref() - .expect("Expected redis url"); - let cache = Redis::try_from_url(redis_url, "test").expect("Could not build redis client"); + let redis_url = std::env::var("REDIS_URL").expect("REDIS_URL not set"); + let cache = + RedisNodeCache::try_from_url(&redis_url, "test").expect("Could not build redis client"); cache.reset_cache().await; let node = IngestionNode { diff --git a/swiftide/src/integrations/treesitter/mod.rs b/swiftide/src/integrations/treesitter/mod.rs new file mode 100644 index 00000000..2aa4130b --- /dev/null +++ b/swiftide/src/integrations/treesitter/mod.rs @@ -0,0 +1,5 @@ +mod splitter; +mod supported_languages; + +pub use splitter::{ChunkSize, CodeSplitter, CodeSplitterBuilder}; +pub use supported_languages::SupportedLanguages; diff --git a/crates/code_ops/src/code_splitter.rs b/swiftide/src/integrations/treesitter/splitter.rs similarity index 90% rename from crates/code_ops/src/code_splitter.rs rename to swiftide/src/integrations/treesitter/splitter.rs index 6cfb254c..2bcf178c 100644 --- a/crates/code_ops/src/code_splitter.rs +++ b/swiftide/src/integrations/treesitter/splitter.rs @@ -1,14 +1,10 @@ -#![allow(dead_code)] -extern crate tree_sitter; - -use crate::SupportedLanguages; use anyhow::{Context as _, Result}; use std::ops::Range; use tree_sitter::{Node, Parser}; use derive_builder::Builder; -use crate::supported_language_to_tree_sitter; +use super::supported_languages::SupportedLanguages; // TODO: Instead of counting bytes, count tokens with titktoken const DEFAULT_MAX_BYTES: usize = 1500; @@ -21,9 +17,23 @@ pub struct CodeSplitter { /// Maximum size of a chunk in bytes or a range of bytes #[builder(default, setter(into))] chunk_size: ChunkSize, + #[builder(setter(custom))] language: SupportedLanguages, } +impl CodeSplitterBuilder { + pub fn language(mut self, language: impl TryInto<SupportedLanguages>) -> Result<Self> { + self.language = Some( + // For some reason there's a trait conflict, wth + language + .try_into() + .ok() + .context("Treesitter language not supported")?, + ); + Ok(self) + } +} + #[derive(Debug, Clone)] pub enum ChunkSize { Bytes(usize), @@ -92,7 +102,7 @@ impl CodeSplitter { pub fn split(&self, code: &str) -> Result<Vec<String>> { let mut parser = Parser::new(); - parser.set_language(&supported_language_to_tree_sitter(&self.language))?; + parser.set_language(&self.language.into())?; let tree = parser.parse(code, None).context("No nodes found")?; let root_node = tree.root_node(); @@ -160,7 +170,7 @@ mod test { #[test] fn test_max_bytes_limit() { let splitter = CodeSplitter::builder() - .language(SupportedLanguages::Rust) + .language(SupportedLanguages::Rust)? .chunk_size(50) .build() .unwrap(); @@ -187,7 +197,7 @@ mod test { #[test] fn test_empty_text() { let splitter = CodeSplitter::builder() - .language(SupportedLanguages::Rust) + .language(SupportedLanguages::Rust)? .chunk_size(50) .build() .unwrap(); @@ -202,7 +212,7 @@ mod test { #[test] fn test_range_max() { let splitter = CodeSplitter::builder() - .language(SupportedLanguages::Rust) + .language(SupportedLanguages::Rust)? .chunk_size(0..50) .build() .unwrap(); @@ -227,7 +237,7 @@ mod test { #[test] fn test_range_min_and_max() { let splitter = CodeSplitter::builder() - .language(SupportedLanguages::Rust) + .language(SupportedLanguages::Rust)? .chunk_size(20..50) .build() .unwrap(); diff --git a/swiftide/src/integrations/treesitter/supported_languages.rs b/swiftide/src/integrations/treesitter/supported_languages.rs new file mode 100644 index 00000000..5c8cae11 --- /dev/null +++ b/swiftide/src/integrations/treesitter/supported_languages.rs @@ -0,0 +1,70 @@ +// pub use std::str::FromStr as _; + +#[derive(Debug, PartialEq, Clone, Copy, strum_macros::EnumString, strum_macros::Display)] +pub enum SupportedLanguages { + Rust, + Typescript, + Python, + Ruby, + Javascript, +} + +static RUST_EXTENSIONS: &[&str] = &["rs"]; +static TYPESCRIPT_EXTENSIONS: &[&str] = &["ts", "tsx", "js", "jsx"]; +static PYTHON_EXTENSIONS: &[&str] = &["py"]; +static RUBY_EXTENSIONS: &[&str] = &["rb"]; +static JAVASCRIPT_EXTENSIONS: &[&str] = &["js", "jsx"]; + +impl SupportedLanguages { + pub fn file_extensions(&self) -> &[&str] { + match self { + SupportedLanguages::Rust => RUST_EXTENSIONS, + SupportedLanguages::Typescript => TYPESCRIPT_EXTENSIONS, + SupportedLanguages::Python => PYTHON_EXTENSIONS, + SupportedLanguages::Ruby => RUBY_EXTENSIONS, + SupportedLanguages::Javascript => JAVASCRIPT_EXTENSIONS, + } + } +} + +impl From<SupportedLanguages> for tree_sitter::Language { + fn from(val: SupportedLanguages) -> Self { + match val { + SupportedLanguages::Rust => tree_sitter_rust::language(), + SupportedLanguages::Python => tree_sitter_python::language(), + SupportedLanguages::Typescript => tree_sitter_typescript::language_typescript(), + SupportedLanguages::Javascript => tree_sitter_javascript::language(), + SupportedLanguages::Ruby => tree_sitter_ruby::language(), + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use std::str::FromStr; + + #[test] + fn test_supported_languages_from_str() { + assert_eq!( + SupportedLanguages::from_str("rust"), + Ok(SupportedLanguages::Rust) + ); + assert_eq!( + SupportedLanguages::from_str("typescript"), + Ok(SupportedLanguages::Typescript) + ); + } + + #[test] + fn test_supported_languages_from_str_case_insensitive() { + assert_eq!( + SupportedLanguages::from_str("Rust"), + Ok(SupportedLanguages::Rust) + ); + assert_eq!( + SupportedLanguages::from_str("TypeScript"), + Ok(SupportedLanguages::Typescript) + ); + } +} diff --git a/swiftide/src/lib.rs b/swiftide/src/lib.rs new file mode 100644 index 00000000..aa371110 --- /dev/null +++ b/swiftide/src/lib.rs @@ -0,0 +1,9 @@ +pub mod embeddings; +pub mod ingestion; +pub mod integrations; +pub mod loaders; +pub mod traits; +pub mod transformers; + +pub use embeddings::*; +pub use traits::*; diff --git a/crates/indexing/src/loaders/file_loader.rs b/swiftide/src/loaders/file_loader.rs similarity index 96% rename from crates/indexing/src/loaders/file_loader.rs rename to swiftide/src/loaders/file_loader.rs index 190d759c..49f648ad 100644 --- a/crates/indexing/src/loaders/file_loader.rs +++ b/swiftide/src/loaders/file_loader.rs @@ -1,8 +1,7 @@ +use crate::{ingestion::IngestionNode, ingestion::IngestionStream, Loader}; use futures_util::{stream, StreamExt}; use std::path::PathBuf; -use crate::{ingestion_node::IngestionNode, ingestion_pipeline::IngestionStream, traits::Loader}; - pub struct FileLoader { pub(crate) path: PathBuf, pub(crate) extensions: Vec<String>, diff --git a/crates/indexing/src/loaders/mod.rs b/swiftide/src/loaders/mod.rs similarity index 100% rename from crates/indexing/src/loaders/mod.rs rename to swiftide/src/loaders/mod.rs diff --git a/crates/indexing/src/traits.rs b/swiftide/src/traits.rs similarity index 78% rename from crates/indexing/src/traits.rs rename to swiftide/src/traits.rs index 281a27dd..4acac54c 100644 --- a/crates/indexing/src/traits.rs +++ b/swiftide/src/traits.rs @@ -1,6 +1,6 @@ use std::fmt::Debug; -use crate::{ingestion_node::IngestionNode, ingestion_pipeline::IngestionStream}; +use crate::{ingestion::IngestionNode, ingestion::IngestionStream, Embeddings}; use anyhow::Result; use async_trait::async_trait; @@ -30,6 +30,27 @@ pub trait ChunkerTransformer: Send + Sync + Debug { async fn transform_node(&self, node: IngestionNode) -> IngestionStream; } +#[async_trait] +/// Caches nodes, typically by their path and hash +/// Recommended to namespace on the storage +/// +/// For now just bool return value for easy filter +pub trait NodeCache: Send + Sync + Debug { + async fn get(&self, node: &IngestionNode) -> bool; + async fn set(&self, node: &IngestionNode); +} + +#[async_trait] +pub trait Embed: Debug + Send + Sync { + async fn embed(&self, input: Vec<String>) -> Result<Embeddings>; +} + +#[async_trait] +pub trait SimplePrompt: Debug + Send + Sync { + // Takes a simple prompt, prompts the llm and returns the response + async fn prompt(&self, prompt: &str) -> Result<String>; +} + #[async_trait] /// Persists nodes pub trait Storage: Send + Sync { @@ -40,13 +61,3 @@ pub trait Storage: Send + Sync { None } } - -#[async_trait] -/// Caches nodes, typically by their path and hash -/// Recommended to namespace on the storage -/// -/// For now just bool return value for easy filter -pub trait NodeCache: Send + Sync + Debug { - async fn get(&self, node: &IngestionNode) -> bool; - async fn set(&self, node: &IngestionNode); -} diff --git a/crates/indexing/src/transformers/chunk_code.rs b/swiftide/src/transformers/chunk_code.rs similarity index 72% rename from crates/indexing/src/transformers/chunk_code.rs rename to swiftide/src/transformers/chunk_code.rs index 38317024..684cdadf 100644 --- a/crates/indexing/src/transformers/chunk_code.rs +++ b/swiftide/src/transformers/chunk_code.rs @@ -1,38 +1,39 @@ -use crate::{ - ingestion_node::IngestionNode, ingestion_pipeline::IngestionStream, traits::ChunkerTransformer, -}; +use anyhow::Result; use async_trait::async_trait; -use code_ops::{ChunkSize, CodeSplitter, SupportedLanguages}; use futures_util::{stream, StreamExt}; +use crate::{ + ingestion::{IngestionNode, IngestionStream}, + integrations::treesitter::{ChunkSize, CodeSplitter, SupportedLanguages}, + ChunkerTransformer, +}; + #[derive(Debug)] pub struct ChunkCode { chunker: CodeSplitter, } impl ChunkCode { - pub fn for_language(lang: impl Into<SupportedLanguages>) -> Self { - let lang = lang.into(); - Self { + pub fn for_language(lang: impl TryInto<SupportedLanguages>) -> Result<Self> { + Ok(Self { chunker: CodeSplitter::builder() - .language(lang) + .language(lang)? .build() .expect("Failed to build code splitter"), - } + }) } pub fn for_language_and_chunk_size( lang: impl Into<SupportedLanguages>, chunk_size: impl Into<ChunkSize>, - ) -> Self { - let lang = lang.into(); - Self { + ) -> Result<Self> { + Ok(Self { chunker: CodeSplitter::builder() - .language(lang) - .chunk_size(chunk_size.into()) + .language(lang)? + .chunk_size(chunk_size) .build() .expect("Failed to build code splitter"), - } + }) } } diff --git a/crates/indexing/src/transformers/chunk_markdown.rs b/swiftide/src/transformers/chunk_markdown.rs similarity index 90% rename from crates/indexing/src/transformers/chunk_markdown.rs rename to swiftide/src/transformers/chunk_markdown.rs index a31e7b98..f74fcdfc 100644 --- a/crates/indexing/src/transformers/chunk_markdown.rs +++ b/swiftide/src/transformers/chunk_markdown.rs @@ -1,6 +1,4 @@ -use crate::{ - ingestion_node::IngestionNode, ingestion_pipeline::IngestionStream, traits::ChunkerTransformer, -}; +use crate::{ingestion::IngestionNode, ingestion::IngestionStream, ChunkerTransformer}; use async_trait::async_trait; use futures_util::{stream, StreamExt}; use text_splitter::{Characters, MarkdownSplitter}; diff --git a/crates/indexing/src/transformers/metadata_qa_code.rs b/swiftide/src/transformers/metadata_qa_code.rs similarity index 86% rename from crates/indexing/src/transformers/metadata_qa_code.rs rename to swiftide/src/transformers/metadata_qa_code.rs index 4bb1baf6..46a9ac82 100644 --- a/crates/indexing/src/transformers/metadata_qa_code.rs +++ b/swiftide/src/transformers/metadata_qa_code.rs @@ -1,11 +1,9 @@ use std::sync::Arc; +use crate::{ingestion::IngestionNode, SimplePrompt, Transformer}; use anyhow::Result; use async_trait::async_trait; use indoc::indoc; -use infrastructure::SimplePrompt; - -use crate::{ingestion_node::IngestionNode, traits::Transformer}; #[derive(Debug)] pub struct MetadataQACode { @@ -15,9 +13,9 @@ pub struct MetadataQACode { } impl MetadataQACode { - pub fn new(client: Arc<dyn SimplePrompt>) -> Self { + pub fn new(client: impl SimplePrompt + 'static) -> Self { Self { - client, + client: Arc::new(client), prompt: default_prompt(), num_questions: 5, } @@ -70,10 +68,7 @@ impl Transformer for MetadataQACode { .replace("{questions}", &self.num_questions.to_string()) .replace("{code}", &node.chunk); - let response = self - .client - .prompt(&prompt, infrastructure::DEFAULT_OPENAI_MODEL) - .await?; + let response = self.client.prompt(&prompt).await?; node.metadata .insert("Questions and Answers".to_string(), response); diff --git a/crates/indexing/src/transformers/metadata_qa_text.rs b/swiftide/src/transformers/metadata_qa_text.rs similarity index 85% rename from crates/indexing/src/transformers/metadata_qa_text.rs rename to swiftide/src/transformers/metadata_qa_text.rs index c026c691..6fcb207e 100644 --- a/crates/indexing/src/transformers/metadata_qa_text.rs +++ b/swiftide/src/transformers/metadata_qa_text.rs @@ -1,11 +1,9 @@ use std::sync::Arc; +use crate::{ingestion::IngestionNode, SimplePrompt, Transformer}; use anyhow::Result; use async_trait::async_trait; use indoc::indoc; -use infrastructure::SimplePrompt; - -use crate::{ingestion_node::IngestionNode, traits::Transformer}; #[derive(Debug)] pub struct MetadataQAText { @@ -15,9 +13,9 @@ pub struct MetadataQAText { } impl MetadataQAText { - pub fn new(client: Arc<dyn SimplePrompt>) -> Self { + pub fn new(client: impl SimplePrompt + 'static) -> Self { Self { - client, + client: Arc::new(client), prompt: default_prompt(), num_questions: 5, } @@ -68,10 +66,7 @@ impl Transformer for MetadataQAText { .replace("{questions}", &self.num_questions.to_string()) .replace("{text}", &node.chunk); - let response = self - .client - .prompt(&prompt, infrastructure::DEFAULT_OPENAI_MODEL) - .await?; + let response = self.client.prompt(&prompt).await?; node.metadata .insert("Questions and Answers".to_string(), response); diff --git a/crates/indexing/src/transformers/mod.rs b/swiftide/src/transformers/mod.rs similarity index 100% rename from crates/indexing/src/transformers/mod.rs rename to swiftide/src/transformers/mod.rs diff --git a/crates/indexing/src/transformers/openai_embed.rs b/swiftide/src/transformers/openai_embed.rs similarity index 79% rename from crates/indexing/src/transformers/openai_embed.rs rename to swiftide/src/transformers/openai_embed.rs index c5ada0b5..28d24e7e 100644 --- a/crates/indexing/src/transformers/openai_embed.rs +++ b/swiftide/src/transformers/openai_embed.rs @@ -1,30 +1,23 @@ use std::sync::Arc; +use crate::{ingestion::IngestionNode, ingestion::IngestionStream, BatchableTransformer, Embed}; use anyhow::Result; use async_trait::async_trait; use futures_util::{stream, StreamExt}; -use infrastructure::Embed; - -use crate::{ - ingestion_node::IngestionNode, ingestion_pipeline::IngestionStream, - traits::BatchableTransformer, -}; #[derive(Debug)] -// TODO: Would be nice if the embedding model encapsulates the token limit +// TODO: Would be nice if the embedding model encapsulates the token limit / dimensions pub struct EmbeddingModel(String); #[derive(Debug)] pub struct OpenAIEmbed { client: Arc<dyn Embed>, - model: EmbeddingModel, } impl OpenAIEmbed { - pub fn new(model: impl Into<EmbeddingModel>, client: Arc<dyn Embed>) -> Self { + pub fn new(client: impl Embed + 'static) -> Self { Self { - client, - model: model.into(), + client: Arc::new(client), } } } @@ -38,7 +31,7 @@ impl BatchableTransformer for OpenAIEmbed { stream::iter( self.client - .embed(chunks_to_embed, &self.model.to_string()) + .embed(chunks_to_embed) .await .map(|embeddings| { nodes