Skip to content

Commit

Permalink
feat: implement new option same-number-of-pages-compact.
Browse files Browse the repository at this point in the history
  • Loading branch information
wxgeo committed Dec 12, 2023
1 parent 07a837e commit 3273ab1
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 4 deletions.
35 changes: 31 additions & 4 deletions ptyx/compilation.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@ def directory(self) -> Path:
def __len__(self):
return len(self.info_dict)

def __getitem__(self, item: DocId | slice):
if isinstance(item, slice):
info_dict = {key: self.info_dict[key] for key in list(self.info_dict)[item]}
return MultipleFilesCompilationInfo(basename=self.basename, info_dict=info_dict)
else:
return self.info_dict[item]


class _LoggedStream(object):
"""Add logging to a data stream, like stdout or stderr.
Expand Down Expand Up @@ -291,7 +298,7 @@ def make_files(
# Pages number is set manually, and don't match.
print(f"Warning: skipping {info.src} (incorrect page number) !")
continue
elif options.same_number_of_pages:
elif options.same_number_of_pages or options.same_number_of_pages_compact:
# Determine automatically the best fixed page count.
# This is a bit subtle. We want all compiled documents to have
# the same pages number, yet we don't want to set it manually.
Expand All @@ -310,9 +317,29 @@ def make_files(

all_compilation_info.info_dict[doc_id] = info
doc_id = DocId(doc_id + 1)
for compil_info in pages_per_document.values():
if len(compil_info.doc_ids) > len(all_compilation_info.doc_ids):
all_compilation_info = compil_info
if options.same_number_of_pages_compact:
# In compact mode, we try to minimize the number of pages of the generated documents.
# To not increase too drastically the time of compilation, we adopt the following heuristic:
# we'll use the shorter documents, if their frequency exceed 25% of the total documents.
total = sum(len(compil_info.doc_ids) for compil_info in pages_per_document.values())
for page_count in sorted(pages_per_document):
if len(pages_per_document[page_count].doc_ids) > total / 4:
all_compilation_info = pages_per_document[page_count]
break
else:
# Exceptionally, if the length of each document is highly variable, each page count value
# may occur less than 25%. Then, we'll select the most frequent page count.
for compil_info in pages_per_document.values():
if len(compil_info.doc_ids) > len(all_compilation_info.doc_ids):
all_compilation_info = compil_info

if len(all_compilation_info) > target:
all_compilation_info = all_compilation_info[:target]

elif options.same_number_of_pages:
for compil_info in pages_per_document.values():
if len(compil_info.doc_ids) > len(all_compilation_info.doc_ids):
all_compilation_info = compil_info

assert len(all_compilation_info) == target, len(all_compilation_info)
filenames = all_compilation_info.pdf_paths
Expand Down
1 change: 1 addition & 0 deletions ptyx/compilation_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class CompilationOptions:
reorder_pages: Literal["brochure", "brochure-reversed", ""] = ""
set_number_of_pages: int = 0
same_number_of_pages: bool = False
same_number_of_pages_compact: bool = False
no_correction: bool = False
no_pdf: bool = False
view: bool = False
Expand Down
14 changes: 14 additions & 0 deletions ptyx/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,20 @@ def __init__(self):
" number of documents is always respected, contrary to `--set-number-of-pages` option."
),
)
group2.add_argument(
"-sc",
"--same-number-of-pages-compact",
action="store_true",
help=(
"Ensure that all documents have the same number of pages."
" Since the number of pages is automatically set, the requested"
" number of documents is always respected, contrary to `--set-number-of-pages` option."
" Try to minimize the number of pages per document."
" Note that this may increase significantly compilation time,"
" compared to `--same-number-of-pages` option"
" (the compilation may be up to 2 times slower)."
),
)
self.add_argument(
"-nc",
"--no-correction",
Expand Down

0 comments on commit 3273ab1

Please sign in to comment.