Skip to content

Commit

Permalink
Merge pull request #66 from alkem-io/develop
Browse files Browse the repository at this point in the history
Release v0.11.0
  • Loading branch information
valeksiev authored Jan 23, 2025
2 parents 0b10dfb + 1a6c2fa commit b010f4a
Show file tree
Hide file tree
Showing 27 changed files with 6,256 additions and 3,111 deletions.
4 changes: 2 additions & 2 deletions .env.default
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ AUTH_ADMIN_PASSWORD=master-password
VECTOR_DB_HOST=localhost
VECTOR_DB_PORT=8765

CHUNK_SIZE=1000
CHUNK_OVERLAP=100
CHUNK_SIZE=9000
CHUNK_OVERLAP=500

SUMMARY_LENGTH=10000

Expand Down
50 changes: 50 additions & 0 deletions graphql/fragments/callout.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
fragment CalloutFields on Callout {
id
nameID
type
visibility
comments {
messagesCount
messages {
sender {
... on User {
profile {
url
displayName
}
}
... on VirtualContributor {
profile {
url
displayName
}
}
}
message
timestamp
}
}
framing {
id
profile {
...ProfileFields
}
}
contributions {
post {
id
nameID
profile {
...ProfileFields
}
}
link {
id
uri
profile {
...ProfileNoTagsetFields
}

}
}
}
6 changes: 6 additions & 0 deletions graphql/fragments/profile.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ fragment ProfileFields on Profile {
tagline
url
type
location {
city
country
postalCode
}

tagset {
tags
}
Expand Down
73 changes: 4 additions & 69 deletions graphql/fragments/space-ingest.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,7 @@ fragment SpaceIngest on Space {
nameID
type
profile {
description
displayName
tagline
url
location {
city
country
postalCode
}
tagset {
tags
}
references {
description
name
uri
}
visuals {
uri
name
}
...ProfileFields
}
context {
vision
Expand All @@ -32,54 +12,9 @@ fragment SpaceIngest on Space {
}

collaboration {
callouts {
id
nameID
type
visibility
comments {
messagesCount
messages {
sender {
... on User {
profile {
url
displayName
}
}
... on VirtualContributor {
profile {
url
displayName
}
}
}
message
timestamp
}
}
framing {
id
profile {
...ProfileFields
}
}
contributions {
post {
id
nameID
profile {
...ProfileFields
}
}
link {
id
uri
profile {
...ProfileNoTagsetFields
}

}
calloutsSet {
callouts {
...CalloutFields
}
}
}
Expand Down
15 changes: 15 additions & 0 deletions graphql/queries/knowledge-base-ingest.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
query knowledgeBaseIngest($knowledgeBaseID: UUID!){
lookup {
knowledgeBase(ID: $knowledgeBaseID) {
id
profile {
...ProfileFields
}
calloutsSet {
callouts {
...CalloutFields
}
}
}
}
}
15 changes: 8 additions & 7 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@alkemio/space-ingest",
"version": "0.10.0",
"name": "@alkemio/ingest-alkemio-data",
"version": "0.11.0",
"description": "",
"author": "Alkemio Foundation",
"private": true,
Expand Down Expand Up @@ -59,6 +59,7 @@
"graphql": "^16.6.0",
"graphql-upload": "^16.0.1",
"langchain": "^0.2.2",
"langsmith": "^0.1.66",
"mammoth": "^1.7.2",
"officeparser": "^4.1.1",
"pdf-parse": "^1.1.1",
Expand Down
2 changes: 2 additions & 0 deletions src/callout.handlers/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ export const baseHandler = async (

const result: Document[] = [
new Document({
id: documentId,
pageContent,
metadata: {
documentId,
Expand All @@ -63,6 +64,7 @@ export const baseHandler = async (
generateDocument(docLike);
result.push(
new Document({
id: documentId,
pageContent,
metadata: {
documentId,
Expand Down
2 changes: 1 addition & 1 deletion src/callout.handlers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { Callout, CalloutType } from '../generated/graphql';
import { Document } from 'langchain/document';
import { baseHandler } from './base';
import { linkCollectionHandler } from './link.collection';
import { AlkemioCliClient } from 'src/graphql.client/AlkemioCliClient';
import { AlkemioCliClient } from '../graphql.client/AlkemioCliClient';

const handlersMap: Record<
CalloutType,
Expand Down
41 changes: 41 additions & 0 deletions src/data.readers/knowledge.base.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import generateDocument from '../generate.document';
import { Document } from 'langchain/document';
import { IngestBodyOfKnowledge } from '../event.bus/events/ingest.body.of.knowledge';
import { AlkemioCliClient } from '../graphql.client/AlkemioCliClient';
import { processCallouts } from '../process.callouts';
import { Callout } from '../generated/graphql';

export const embedKnowledgeBase = async (
event: IngestBodyOfKnowledge,
alkemioClient: AlkemioCliClient
) => {
const knowledgeBaseId = event.bodyOfKnowledgeId;
// make sure the service user has sufficient priviliges
const knowledgeBase = await alkemioClient.ingestKnowledgeBase(
knowledgeBaseId
);
const documents: Document[] = [];
const { documentId, source, pageContent, type, title } =
generateDocument(knowledgeBase);

documents.push(
new Document({
id: documentId,
pageContent,
metadata: {
documentId,
source,
type,
title,
},
})
);

const calloutDocs = await processCallouts(
(knowledgeBase.calloutsSet?.callouts || []) as Partial<Callout>[],
alkemioClient
);
documents.push(...calloutDocs);

return { bodyOfKnowledge: knowledgeBase, documents };
};
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { Document } from 'langchain/document';

import { CalloutVisibility, Callout, Space } from '../generated/graphql';
import { Space } from '../generated/graphql';

import logger from '../logger';
import generateDocument from '../generate.document';
import { handleCallout } from '../callout.handlers';
import { AlkemioCliClient } from '../graphql.client/AlkemioCliClient';
import { processCallouts } from '../process.callouts';

// recursive function
// first invocation is with [rootSpace]
// second invocation is with rootSpace.subspaces
Expand All @@ -21,6 +21,7 @@ export const processSpaceTree = async (
generateDocument(subspace);
documents.push(
new Document({
id: documentId,
pageContent,
metadata: {
documentId,
Expand All @@ -30,21 +31,11 @@ export const processSpaceTree = async (
},
})
);

for (let j = 0; j < (subspace.collaboration?.callouts || []).length; j++) {
const callout = (subspace.collaboration?.callouts || [])[j];
if (callout && callout.visibility === CalloutVisibility.Published) {
const document = await handleCallout(
callout as Partial<Callout>,
logger,
alkemioClient
);
// empty doc - nothing to do here
if (document) {
documents.push(...document);
}
}
}
const calloutDocs = await processCallouts(
subspace.collaboration?.calloutsSet?.callouts || [],
alkemioClient
);
documents.push(...calloutDocs);

// incoke recursively for the subspaces of the rootSpace
const subspacesDocs = await processSpaceTree(
Expand Down
23 changes: 23 additions & 0 deletions src/data.readers/space.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { Document } from 'langchain/document';

import { Space } from '../generated/graphql';

import { AlkemioCliClient } from '../graphql.client/AlkemioCliClient';
import { processSpaceTree } from './process.space.tree';
import { IngestBodyOfKnowledge } from 'src/event.bus/events/ingest.body.of.knowledge';
import { ReadResult } from './types';

export const embedSpace = async (
event: IngestBodyOfKnowledge,
alkemioClient: AlkemioCliClient
): Promise<ReadResult> => {
const spaceId = event.bodyOfKnowledgeId;
// make sure the service user has sufficient priviliges
const space = await alkemioClient.ingestSpace(spaceId);
const documents: Document[] = await processSpaceTree(
[space as Partial<Space>],
alkemioClient
);

return { bodyOfKnowledge: space, documents };
};
9 changes: 9 additions & 0 deletions src/data.readers/types.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
export declare type BodyOfKnowledgeReadResult = {
id: string;
profile: { displayName: string; url: string };
};

export declare type ReadResult = {
documents?: Document[];
bodyOfKnowledge?: BodyOfKnowledgeReadResult;
};
Loading

0 comments on commit b010f4a

Please sign in to comment.