Skip to content

Commit

Permalink
resorted
Browse files Browse the repository at this point in the history
  • Loading branch information
soldni committed Jan 29, 2024
1 parent dd1a848 commit 4cabad8
Show file tree
Hide file tree
Showing 11 changed files with 17 additions and 6 deletions.
3 changes: 2 additions & 1 deletion python/dolma/cli/deduper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
from typing import Any, Dict, List, Optional

import smart_open
from omegaconf import OmegaConf as om

from dolma import deduper
from dolma.cli import BaseCli, field, print_config
from dolma.cli.shared import WorkDirConfig, get_path_to_temp_file, make_workdirs
from dolma.core.errors import DolmaConfigError
from dolma.core.loggers import get_logger
from dolma.core.paths import glob_path, is_local
from omegaconf import OmegaConf as om


@dataclass
Expand Down
5 changes: 3 additions & 2 deletions python/dolma/cli/tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
from pstats import SortKey
from typing import List, Optional

from rich.console import Console
from rich.table import Table

from dolma.cli import BaseCli, field, print_config
from dolma.cli.shared import WorkDirConfig, make_workdirs
from dolma.core.errors import DolmaConfigError
Expand All @@ -10,8 +13,6 @@
from dolma.core.registry import TaggerRegistry
from dolma.core.runtime import create_and_run_tagger
from dolma.core.utils import import_modules
from rich.console import Console
from rich.table import Table


@dataclass
Expand Down
1 change: 1 addition & 0 deletions python/dolma/core/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import msgspec
import smart_open

from dolma.core.taggers import BaseTaggerWithMetadata

from .data_types import (
Expand Down
1 change: 1 addition & 0 deletions tests/python/test_binning.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest

import numpy as np

from dolma.core.binning import (
FixedBucketsValTracker,
InferBucketsValTracker,
Expand Down
1 change: 1 addition & 0 deletions tests/python/test_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import unittest

from bs4 import BeautifulSoup

from dolma.core.data_types import Document, DocumentWithMetadata
from dolma.taggers.code import (
CodeCopyrightTagger,
Expand Down
3 changes: 2 additions & 1 deletion tests/python/test_omegaconf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
from dataclasses import dataclass
from unittest import TestCase

from dolma.cli import field, make_parser, namespace_to_nested_omegaconf
from omegaconf import MissingMandatoryValue

from dolma.cli import field, make_parser, namespace_to_nested_omegaconf


@dataclass
class _1:
Expand Down
1 change: 1 addition & 0 deletions tests/python/test_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from unittest import TestCase

import smart_open

from dolma.core.parallel import BaseParallelProcessor, QueueType

LOCAL_DATA = Path(__file__).parent.parent / "data"
Expand Down
1 change: 1 addition & 0 deletions tests/python/test_repetitions_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from unittest import TestCase

import numpy as np

from dolma.taggers.repetitions.utils import (
find_end_first_consecutive_true,
find_periodic_sequences,
Expand Down
1 change: 1 addition & 0 deletions tests/python/test_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from unittest import TestCase

import smart_open

from dolma.core.runtime import (
_make_paths_from_prefix,
_make_paths_from_substitution,
Expand Down
3 changes: 2 additions & 1 deletion tests/python/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@

import numpy
import smart_open
from tokenizers import Tokenizer as BaseTokenizer

from dolma.cli.__main__ import main
from dolma.tokenizer import Tokenizer
from tokenizers import Tokenizer as BaseTokenizer

TEST_DIR = Path(__file__).parent.parent.resolve()

Expand Down
3 changes: 2 additions & 1 deletion tests/python/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@

import boto3
import smart_open
from dolma.core.paths import glob_path, mkdir_p
from smart_open import open

from dolma.core.paths import glob_path, mkdir_p

DOLMA_TESTS_S3_PREFIX_ENV_VAR = "DOLMA_TESTS_S3_PREFIX"
DOLMA_TESTS_SKIP_AWS_ENV_VAR = "DOLMA_TESTS_SKIP_AWS"
DOLMA_TESTS_S3_PREFIX_DEFAULT = "s3://dolma-tests"
Expand Down

0 comments on commit 4cabad8

Please sign in to comment.