Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added deprecation warnings for mlm_* arguments. #101

Merged
merged 2 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 36 additions & 2 deletions src/markitdown/_markitdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import zipfile
from typing import Any, Dict, List, Optional, Union
from urllib.parse import parse_qs, quote, unquote, urlparse, urlunparse
from warnings import catch_warnings
from warnings import warn, resetwarnings, catch_warnings

import mammoth
import markdownify
Expand Down Expand Up @@ -44,6 +44,8 @@
IS_AUDIO_TRANSCRIPTION_CAPABLE = True
except ModuleNotFoundError:
pass
finally:
resetwarnings()

# Optional YouTube transcription support
try:
Expand Down Expand Up @@ -1008,14 +1010,46 @@ def __init__(
self,
requests_session: Optional[requests.Session] = None,
llm_client: Optional[Any] = None,
llm_model: Optional[Any] = None,
llm_model: Optional[str] = None,
style_map: Optional[str] = None,
# Deprecated
mlm_client: Optional[Any] = None,
mlm_model: Optional[str] = None,
):
if requests_session is None:
self._requests_session = requests.Session()
else:
self._requests_session = requests_session

# Handle deprecation notices
#############################
if mlm_client is not None:
if llm_client is None:
warn(
"'mlm_client' is deprecated, and was renamed 'llm_client'.",
DeprecationWarning,
)
llm_client = mlm_client
mlm_client = None
else:
raise ValueError(
"'mlm_client' is deprecated, and was renamed 'llm_client'. Do not use both at the same time. Just use 'llm_client' instead."
)

if mlm_model is not None:
if llm_model is None:
warn(
"'mlm_model' is deprecated, and was renamed 'llm_model'.",
DeprecationWarning,
)
llm_model = mlm_model
mlm_model = None
else:
raise ValueError(
"'mlm_model' is deprecated, and was renamed 'llm_model'. Do not use both at the same time. Just use 'llm_model' instead."
)
#############################

self._llm_client = llm_client
self._llm_model = llm_model
self._style_map = style_map
Expand Down
37 changes: 37 additions & 0 deletions tests/test_markitdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import pytest
import requests

from warnings import catch_warnings, resetwarnings

from markitdown import MarkItDown

skip_remote = (
Expand Down Expand Up @@ -243,6 +245,40 @@ def test_markitdown_exiftool() -> None:
assert target in result.text_content


def test_markitdown_deprecation() -> None:
try:
with catch_warnings(record=True) as w:
test_client = object()
markitdown = MarkItDown(mlm_client=test_client)
assert len(w) == 1
assert w[0].category is DeprecationWarning
assert markitdown._llm_client == test_client
finally:
resetwarnings()

try:
with catch_warnings(record=True) as w:
markitdown = MarkItDown(mlm_model="gpt-4o")
assert len(w) == 1
assert w[0].category is DeprecationWarning
assert markitdown._llm_model == "gpt-4o"
finally:
resetwarnings()

try:
test_client = object()
markitdown = MarkItDown(mlm_client=test_client, llm_client=test_client)
assert False
except ValueError:
pass

try:
markitdown = MarkItDown(mlm_model="gpt-4o", llm_model="gpt-4o")
assert False
except ValueError:
pass


@pytest.mark.skipif(
skip_llm,
reason="do not run llm tests without a key",
Expand All @@ -267,4 +303,5 @@ def test_markitdown_llm() -> None:
test_markitdown_remote()
test_markitdown_local()
test_markitdown_exiftool()
test_markitdown_deprecation()
test_markitdown_llm()
Loading