Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
Coniferish committed Dec 27, 2023
1 parent 41909ef commit 415a631
Showing 1 changed file with 16 additions and 12 deletions.
28 changes: 16 additions & 12 deletions prepline_general/api/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def get_pdf_splits(pdf_pages: List[PageObject], split_size: int = 1):
for page in pdf_pages[offset:end]:
new_pdf.add_page(page)

new_pdf.write(pdf_buffer) # type: ignore
new_pdf.write(pdf_buffer) # type: ignore
pdf_buffer.seek(0)

yield (pdf_buffer.read(), offset)
Expand Down Expand Up @@ -196,7 +196,7 @@ def partition_pdf_splits(
request: Request,
pdf_pages: List[PageObject],
file: IO[bytes],
metadata_filename: str,
metadata_filename: Optional[str],
content_type: str,
coordinates: bool,
**partition_kwargs: Dict[str, Any],
Expand Down Expand Up @@ -265,7 +265,7 @@ def __enter__(self):

Is_Chipper_Processing = True

def __exit__(self, exc_type, exc_value, exc_tb):
def __exit__(self, exc_type, exc_value, exc_tb): # type: ignore
global Is_Chipper_Processing
Is_Chipper_Processing = False

Expand Down Expand Up @@ -500,7 +500,7 @@ def pipeline_api(
# Clean up returned elements
# Note(austin): pydantic should control this sort of thing for us
for i, element in enumerate(elements):
elements[i].metadata.filename = os.path.basename(filename)
elements[i].metadata.filename = os.path.basename(filename) # type: ignore

if not show_coordinates and element.metadata.coordinates:
elements[i].metadata.coordinates = None
Expand Down Expand Up @@ -591,15 +591,14 @@ def _validate_chunking_strategy(m_chunking_strategy: List[str]) -> Union[str, No
return chunking_strategy


def _set_pdf_infer_table_structure(m_pdf_infer_table_structure: List[str], strategy: str):
def _set_pdf_infer_table_structure(m_pdf_infer_table_structure: List[str], strategy: str) -> bool:
pdf_infer_table_structure = (
m_pdf_infer_table_structure[0] if len(m_pdf_infer_table_structure) else "false"
).lower()
if strategy == "hi_res" and pdf_infer_table_structure == "true":
pdf_infer_table_structure = True
return True
else:
pdf_infer_table_structure = False
return pdf_infer_table_structure
return False


def get_validated_mimetype(file: UploadFile):
Expand Down Expand Up @@ -635,7 +634,12 @@ def get_validated_mimetype(file: UploadFile):
class MultipartMixedResponse(StreamingResponse):
CRLF = b"\r\n"

def __init__(self, *args: Any, content_type: Union[str, None] = None, **kwargs: Dict[str, Any]):
def __init__(
self,
*args: Any,
content_type: Union[str, None] = None,
**kwargs,
):
super().__init__(*args, **kwargs)
self.content_type = content_type

Expand Down Expand Up @@ -821,11 +825,11 @@ def response_generator(is_multipart: bool):
def join_responses(responses: List[Any]):
if media_type != "text/csv":
return responses
data = pd.read_csv(io.BytesIO(responses[0].body))
data = pd.read_csv(io.BytesIO(responses[0].body)) # type: ignore
if len(responses) > 1:
for resp in responses[1:]:
resp_data = pd.read_csv(io.BytesIO(resp.body))
data = data.merge(resp_data, how="outer")
resp_data = pd.read_csv(io.BytesIO(resp.body)) # type: ignore
data = data.merge(resp_data, how="outer") # type: ignore
return PlainTextResponse(data.to_csv())

if content_type == "multipart/mixed":
Expand Down

0 comments on commit 415a631

Please sign in to comment.