Skip to content

Commit

Permalink
Allow using shared extractors (#54)
Browse files Browse the repository at this point in the history
Allow using shared extractors.

This won't work with current version of backend due to cookies not being
passed
  • Loading branch information
eyurtsev authored Mar 20, 2024
1 parent 26669d8 commit 6e80282
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 16 deletions.
56 changes: 55 additions & 1 deletion backend/server/api/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sqlalchemy.orm import Session
from typing_extensions import Annotated

from db.models import Extractor, get_session
from db.models import Extractor, SharedExtractors, get_session
from extraction.parsing import parse_binary_input
from server.extraction_runnable import ExtractResponse, extract_entire_document
from server.models import DEFAULT_MODEL
Expand Down Expand Up @@ -61,3 +61,57 @@ async def extract_using_existing_extractor(
raise ValueError(
f"Invalid mode {mode}. Expected one of 'entire_document', 'retrieval'."
)


@router.post("/shared", response_model=ExtractResponse)
async def extract_using_shared_extractor(
*,
extractor_id: Annotated[UUID, Form()],
text: Optional[str] = Form(None),
mode: Literal["entire_document", "retrieval"] = Form("entire_document"),
file: Optional[UploadFile] = File(None),
model_name: Optional[str] = Form("default"),
session: Session = Depends(get_session),
) -> ExtractResponse:
"""Endpoint that is used with an existing extractor.
Args:
extractor_id: The UUID of the shared extractor.
This is the UUID that is used to share the extractor, not
the UUID of the extractor itself.
text: The text to extract from.
mode: The mode to use for extraction.
file: The file to extract from.
model_name: The model to use for extraction.
session: The database session.
"""
if text is None and file is None:
raise HTTPException(status_code=422, detail="No text or file provided.")

extractor = (
session.query(Extractor)
.join(SharedExtractors, Extractor.uuid == SharedExtractors.extractor_id)
.filter(SharedExtractors.share_token == extractor_id)
.scalar()
)

if not extractor:
raise HTTPException(status_code=404, detail="Extractor not found.")

if text:
text_ = text
else:
documents = parse_binary_input(file.file)
# TODO: Add metadata like location from original file where
# the text was extracted from
text_ = "\n".join([document.page_content for document in documents])

if mode == "entire_document":
return await extract_entire_document(text_, extractor, model_name)
elif mode == "retrieval":
return await extract_from_content(text_, extractor, model_name)
else:
raise ValueError(
f"Invalid mode {mode}. Expected one of 'entire_document', 'retrieval'."
)
12 changes: 4 additions & 8 deletions frontend/app/components/Playground.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ export const Playground = (props: PlaygroundProps) => {
Object.assign(request, { file: event.currentTarget.file.files[0] });
}

mutate(request);
mutate([request, isShared]);
};

const handleChange = (event: React.FormEvent<HTMLFormElement>) => {
Expand Down Expand Up @@ -81,13 +81,9 @@ export const Playground = (props: PlaygroundProps) => {
className="textarea textarea-bordered h-3/4"
autoFocus
/>
{isShared ? (
<div>Extraction using shared extractor is not supported yet</div>
) : (
<Button type="submit" disabled={isDisabled}>
Run
</Button>
)}
<Button type="submit" disabled={isDisabled}>
Run
</Button>
</form>
</div>
<div className="m-auto">
Expand Down
20 changes: 15 additions & 5 deletions frontend/app/utils/api.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ type GetExtractorQueryKey = [string, string, boolean]; // [queryKey, uuid, isSha

type OnSuccessFn = (data: { uuid: string }) => void;

axios.defaults.withCredentials = true;

const getExtractor = async ({
queryKey,
}: QueryFunctionContext<GetExtractorQueryKey>): Promise<ExtractorData> => {
Expand Down Expand Up @@ -66,13 +68,21 @@ export const suggestExtractor = async ({
return response.data;
};

// eslint-disable-next-line @typescript-eslint/no-explicit-any
export const runExtraction: MutationFunction<any, any> = async (
extractionRequest,
) => {
type ExtractionRequest = {
extractor_id: string;
text?: string;
file?: File;
};

export const runExtraction: MutationFunction<
// eslint-disable-next-line @typescript-eslint/no-explicit-any
any,
[ExtractionRequest, boolean]
> = async ([extractionRequest, isShared]) => {
const endpoint = isShared ? "extract/shared" : "extract";
const baseUrl = getBaseApiUrl();
const response = await axios.postForm(
`${baseUrl}/extract`,
`${baseUrl}/${endpoint}`,
extractionRequest,
);
return response.data;
Expand Down
4 changes: 2 additions & 2 deletions frontend/middleware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import { v4 as uuidv4 } from "uuid";
// Automatically set a 'user_id' cookie if one doesn't exist
// This function will run on every page request, so we can
// guarantee that a 'user_id' cookie will always be set.

const USER_ID_COOKIE_KEY = "user_id";
/* TODO: Change this to 'user_id' */
const USER_ID_COOKIE_KEY = "owner_id";

export function middleware(request: NextRequest) {
const response = NextResponse.next();
Expand Down

0 comments on commit 6e80282

Please sign in to comment.