From dd5d226f6a377fbf3f98f714323921539a418d83 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Thu, 14 Nov 2024 15:52:59 +0500 Subject: [PATCH 01/76] fix: Count unique texts, data leaks in calculate metrics (#1438) * add more stat * add more stat * update statistics --- mteb/abstasks/AbsTask.py | 12 +- mteb/abstasks/AbsTaskBitextMining.py | 39 +- mteb/abstasks/AbsTaskClassification.py | 29 +- mteb/abstasks/AbsTaskClustering.py | 26 +- mteb/abstasks/AbsTaskClusteringFast.py | 21 +- mteb/abstasks/AbsTaskInstructionRetrieval.py | 98 +- .../AbsTaskMultilabelClassification.py | 42 +- mteb/abstasks/AbsTaskPairClassification.py | 42 +- mteb/abstasks/AbsTaskReranking.py | 60 +- mteb/abstasks/AbsTaskRetrieval.py | 58 +- mteb/abstasks/AbsTaskSTS.py | 34 +- mteb/abstasks/AbsTaskSummarization.py | 76 +- .../BitextMining/BUCC.v2.json | 69 + .../BitextMining/BornholmBitextMining.json | 11 +- .../BitextMining/IN22ConvBitextMining.json | 5577 +++- .../BitextMining/IN22GenBitextMining.json | 6595 ++++ .../BitextMining/IWSLT2017BitextMining.json | 329 + .../IndicGenBenchFloresBitextMining.json | 1540 + .../BitextMining/NTREXBitextMining.json | 24925 ++++++++++++++++ .../BitextMining/NollySentiBitextMining.json | 69 + .../NorwegianCourtsBitextMining.json | 15 + .../NusaTranslationBitextMining.json | 132 +- .../BitextMining/PhincBitextMining.json | 30 + .../TbilisiCityHallBitextMining.json | 43 + .../BitextMining/VieMedEVBitextMining.json | 15 + .../LanguageClassification.json | 76 + .../SlovakHateSpeechClassification.json | 22 + .../ArXivHierarchicalClusteringP2P.json | 4 + .../Clustering/BiorxivClusteringS2S.json | 5 + .../Clustering/MedrxivClusteringP2P.v2.json | 168 + .../Clustering/MedrxivClusteringS2S.v2.json | 168 + .../Clustering/RedditClusteringP2P.v2.json | 1335 + .../RuSciBenchGRNTIClusteringP2P.json | 4 + .../TwentyNewsgroupsClustering.v2.json | 75 + .../Clustering/WikiClusteringP2P.json | 75 + .../Core17InstructionRetrieval.json | 18 +- .../CEDRClassification.json | 43 +- .../MultiEURLEXMultilabelClassification.json | 1732 -- .../PawsXPairClassification.json | 160 +- .../PairClassification/TwitterURLCorpus.json | 10 +- .../PairClassification/XNLI.json | 300 +- .../Reranking/AskUbuntuDupQuestions.json | 15 +- .../Reranking/ESCIReranking.json | 60 +- .../WikipediaRerankingMultilingual.json | 255 +- .../Retrieval/AppsRetrieval.json | 17 +- .../Retrieval/BelebeleRetrieval.json | 6413 +++- .../Retrieval/COIRCodeSearchNetRetrieval.json | 117 +- .../Retrieval/CodeEditSearchRetrieval.json | 236 +- .../Retrieval/CodeFeedbackMT.json | 17 +- .../Retrieval/CodeFeedbackST.json | 17 +- .../Retrieval/CodeSearchNetCCRetrieval.json | 117 +- .../Retrieval/CodeSearchNetRetrieval.json | 117 +- .../Retrieval/CodeTransOceanContest.json | 17 +- .../Retrieval/CodeTransOceanDL.json | 17 +- mteb/descriptive_stats/Retrieval/CosQA.json | 17 +- .../Retrieval/JaqketRetrieval.json | 17 +- .../descriptive_stats/Retrieval/NFCorpus.json | 11 + .../Retrieval/StackOverflowQA.json | 17 +- .../Retrieval/SyntheticText2SQL.json | 17 +- .../Retrieval/Touche2020.json | 17 +- .../Retrieval/Touche2020Retrieval.v3.json | 17 +- ...lowIRCrossLingualInstructionRetrieval.json | 70 +- .../mFollowIRInstructionRetrieval.json | 70 +- mteb/descriptive_stats/STS/STS12.json | 10 +- mteb/descriptive_stats/STS/STS17.json | 118 +- .../Summarization/SummEval.json | 53 +- tests/test_benchmark/mock_tasks.py | 652 +- tests/test_tasks/test_metadata.py | 4 +- 68 files changed, 47767 insertions(+), 4820 deletions(-) create mode 100644 mteb/descriptive_stats/BitextMining/BUCC.v2.json create mode 100644 mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/NTREXBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/PhincBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json create mode 100644 mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json create mode 100644 mteb/descriptive_stats/Clustering/MedrxivClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/MedrxivClusteringS2S.v2.json create mode 100644 mteb/descriptive_stats/Clustering/RedditClusteringP2P.v2.json create mode 100644 mteb/descriptive_stats/Clustering/TwentyNewsgroupsClustering.v2.json delete mode 100644 mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json create mode 100644 mteb/descriptive_stats/Retrieval/NFCorpus.json diff --git a/mteb/abstasks/AbsTask.py b/mteb/abstasks/AbsTask.py index 0bdbdeaf84..8b9edfd52c 100644 --- a/mteb/abstasks/AbsTask.py +++ b/mteb/abstasks/AbsTask.py @@ -200,7 +200,11 @@ def calculate_metadata_metrics( descriptive_stats = {} hf_subset_stat = "hf_subset_descriptive_stats" - pbar_split = tqdm.tqdm(self.metadata.eval_splits, desc="Processing Splits...") + eval_splits = self.metadata.eval_splits + if self.metadata.type in ["Classification", "MultilabelClassification"]: + eval_splits += ["train"] + + pbar_split = tqdm.tqdm(eval_splits, desc="Processing Splits...") for split in pbar_split: pbar_split.set_postfix_str(f"Split: {split}") logger.info(f"Processing metadata for split {split}") @@ -215,12 +219,8 @@ def calculate_metadata_metrics( if isinstance(self.metadata.eval_langs, dict) else self.metadata.eval_langs ) - if self.metadata.type == "Classification": - eval_langs += ["train"] - pbar_subsets = tqdm.tqdm( - self.metadata.eval_langs, desc="Processing Languages..." - ) + pbar_subsets = tqdm.tqdm(eval_langs, desc="Processing Languages...") for hf_subset in pbar_subsets: pbar_subsets.set_postfix_str(f"Language: {hf_subset}") logger.info(f"Processing metadata for language {hf_subset}") diff --git a/mteb/abstasks/AbsTaskBitextMining.py b/mteb/abstasks/AbsTaskBitextMining.py index 00a9160b9b..59d64039fd 100644 --- a/mteb/abstasks/AbsTaskBitextMining.py +++ b/mteb/abstasks/AbsTaskBitextMining.py @@ -21,14 +21,31 @@ class BitextDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + unique_pairs: Number of duplicate pairs + + min_sentence1_length: Minimum length of sentence1 average_sentence1_length: Average length of sentence1 + max_sentence1_length: Maximum length of sentence1 + unique_sentence1: Number of duplicates in sentence1 + + min_sentence2_length: Minimum length of sentence2 average_sentence2_length: Average length of sentence2 + max_sentence2_length: Maximum length of sentence2 """ num_samples: int number_of_characters: int + unique_pairs: int + + min_sentence1_length: int average_sentence1_length: float + max_sentence1_length: int + unique_sentence1: int + + min_sentence2_length: int average_sentence2_length: float + max_sentence2_length: int + unique_sentence2: int class AbsTaskBitextMining(AbsTask): @@ -153,12 +170,24 @@ def _calculate_metrics_from_split( sent_1, sent_2 = pairs_cols[0] sentence1 = self.dataset[split][sent_1] sentence2 = self.dataset[split][sent_2] - total_s1_len = sum([len(s1) for s1 in sentence1]) - total_s2_len = sum([len(s2) for s2 in sentence2]) - + s1_len = [len(s1) for s1 in sentence1] + s2_len = [len(s2) for s2 in sentence2] + total_s1_len = sum(s1_len) + total_s2_len = sum(s2_len) + + unique_pairs = len(set(zip(sentence1, sentence2))) + unique_sentence1 = len(set(sentence1)) + unique_sentence2 = len(set(sentence2)) return BitextDescriptiveStatistics( - average_sentence1_length=total_s1_len / len(sentence1), - average_sentence2_length=total_s2_len / len(sentence2), num_samples=len(sentence1), number_of_characters=total_s1_len + total_s2_len, + unique_pairs=unique_pairs, + min_sentence1_length=min(s1_len), + average_sentence1_length=sum(s1_len) / len(sentence1), + max_sentence1_length=max(s1_len), + unique_sentence1=unique_sentence1, + min_sentence2_length=min(s2_len), + average_sentence2_length=total_s2_len / len(sentence2), + max_sentence2_length=max(s2_len), + unique_sentence2=unique_sentence2, ) diff --git a/mteb/abstasks/AbsTaskClassification.py b/mteb/abstasks/AbsTaskClassification.py index 58b4441a13..62908c98a4 100644 --- a/mteb/abstasks/AbsTaskClassification.py +++ b/mteb/abstasks/AbsTaskClassification.py @@ -26,14 +26,26 @@ class ClassificationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + num_texts_in_train: Number of texts in the train split + + min_text_length: Minimum length of text average_text_length: Average length of text + max_text_length: Maximum length of text + unique_text: Number of unique texts + unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int + num_texts_in_train: int | None + + min_text_length: int average_text_length: float + max_text_length: int + unique_text: int + unique_labels: int labels: dict[str, dict[str, int]] @@ -205,25 +217,40 @@ def _undersample_data(self, X, y, samples_per_label: int, idxs=None): def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> ClassificationDescriptiveStatistics: + train_text = [] if hf_subset: text = self.dataset[hf_subset][split]["text"] label = self.dataset[hf_subset][split]["label"] + if split != "train": + train_text = self.dataset[hf_subset]["train"]["text"] elif compute_overall: text = [] label = [] for hf_subset in self.metadata.eval_langs: text.extend(self.dataset[hf_subset][split]["text"]) label.extend(self.dataset[hf_subset][split]["label"]) + if split != "train": + train_text.extend(self.dataset[hf_subset]["train"]["text"]) else: text = self.dataset[split]["text"] label = self.dataset[split]["label"] + if split != "train": + train_text = self.dataset["train"]["text"] - total_text_len = sum([len(t) for t in text]) + text_len = [len(t) for t in text] + total_text_len = sum(text_len) label_count = Counter(label) + num_texts_in_train = ( + len(set(text) & set(train_text)) if split != "train" else None + ) return ClassificationDescriptiveStatistics( num_samples=len(text), number_of_characters=total_text_len, + num_texts_in_train=num_texts_in_train, + min_text_length=min(text_len), average_text_length=total_text_len / len(text), + max_text_length=max(text_len), + unique_text=len(set(text)), unique_labels=len(label_count), labels={ str(label): {"count": count} for label, count in label_count.items() diff --git a/mteb/abstasks/AbsTaskClustering.py b/mteb/abstasks/AbsTaskClustering.py index 7f2c94e144..3b5d0f492d 100644 --- a/mteb/abstasks/AbsTaskClustering.py +++ b/mteb/abstasks/AbsTaskClustering.py @@ -24,16 +24,31 @@ class ClusteringDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + + min_text_length: Minimum length of text average_text_length: Average length of text + max_text_length: Maximum length of text + unique_texts: Number of unique texts + + min_labels_per_text: Minimum number of labels per text average_labels_per_text: Average number of labels per text + max_labels_per_text: Maximum number of labels per text unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int + + min_text_length: int average_text_length: float + max_text_length: int + unique_texts: int + + min_labels_per_text: int average_labels_per_text: float + max_labels_per_text: int + unique_labels: int labels: dict[str, dict[str, int]] @@ -96,7 +111,11 @@ def _calculate_metrics_from_split( sentences = self.dataset[split]["sentences"] labels = self.dataset[split]["labels"] - total_text_len = sum([len(t) for t in sentences]) + text_len = [len(t) for t in sentences] + all_sentences = [] + for s in sentences: + all_sentences.extend(s) + total_text_len = sum(text_len) total_labels = [] for label in labels: if isinstance(label, list): @@ -107,8 +126,13 @@ def _calculate_metrics_from_split( return ClusteringDescriptiveStatistics( num_samples=len(sentences), number_of_characters=total_text_len, + min_text_length=min(text_len), average_text_length=total_text_len / len(sentences), + max_text_length=max(text_len), + unique_texts=len(set(all_sentences)), + min_labels_per_text=min(label_counter.values()), average_labels_per_text=len(total_labels) / len(sentences), + max_labels_per_text=max(label_counter.values()), unique_labels=len(label_counter), labels={ str(label): { diff --git a/mteb/abstasks/AbsTaskClusteringFast.py b/mteb/abstasks/AbsTaskClusteringFast.py index fedf392f71..40e36d29e2 100644 --- a/mteb/abstasks/AbsTaskClusteringFast.py +++ b/mteb/abstasks/AbsTaskClusteringFast.py @@ -85,16 +85,30 @@ class ClusteringFastDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + + min_text_length: Minimum length of text average_text_length: Average length of text + max_text_length: Maximum length of text + unique_texts: Number of unique texts + + min_labels_per_text: Minimum number of labels per text average_labels_per_text: Average number of labels per text + max_labels_per_text: Maximum number of labels per text unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int + + min_text_length: int average_text_length: float + max_text_length: int + unique_texts: int + + min_labels_per_text: int average_labels_per_text: float + max_labels_per_text: int unique_labels: int labels: dict[str, dict[str, int]] @@ -226,7 +240,8 @@ def _calculate_metrics_from_split( sentences = self.dataset[split]["sentences"] labels = self.dataset[split]["labels"] - total_text_len = sum([len(t) for t in sentences]) + text_len = [len(t) for t in sentences] + total_text_len = sum(text_len) total_labels = [] for label in labels: if isinstance(label, list): @@ -237,8 +252,12 @@ def _calculate_metrics_from_split( return ClusteringFastDescriptiveStatistics( num_samples=len(sentences), number_of_characters=total_text_len, + min_text_length=min(text_len), average_text_length=total_text_len / len(sentences), + max_text_length=max(text_len), + min_labels_per_text=min(label_counter.values()), average_labels_per_text=len(total_labels) / len(sentences), + max_labels_per_text=max(label_counter.values()), unique_labels=len(label_counter), labels={ str(label): { diff --git a/mteb/abstasks/AbsTaskInstructionRetrieval.py b/mteb/abstasks/AbsTaskInstructionRetrieval.py index bdbe5cd6c5..219426fe63 100644 --- a/mteb/abstasks/AbsTaskInstructionRetrieval.py +++ b/mteb/abstasks/AbsTaskInstructionRetrieval.py @@ -37,6 +37,7 @@ def __init__( qrels_file: str = "", streaming: bool = False, keep_in_memory: bool = False, + trust_remote_code: bool = False, ): self.corpus = {} self.queries = {} @@ -69,6 +70,7 @@ def __init__( self.qrels_file = qrels_file self.streaming = streaming self.keep_in_memory = keep_in_memory + self.trust_remote_code = trust_remote_code def load( self, split="test" @@ -227,24 +229,68 @@ class InstructionRetrievalDescriptiveStatistics(DescriptiveStatistics): num_queries: Number of queries num_docs: Number of documents number_of_characters: Total number of symbols in the dataset + + min_document_length: Minimum length of documents average_document_length: Average length of documents + max_document_length: Maximum length of documents + unique_docs: Number of unique documents + + min_query_length: Minimum length of queries average_query_length: Average length of queries + max_query_length: Maximum length of queries + unique_queries: Number of unique queries + + min_instruction_length: Minimum length of instructions average_instruction_length: Average length of instructions + max_instruction_length: Maximum length of instructions + unique_instructions: Number of unique instructions + + min_changed_instruction_length: Minimum length of changed instructions average_changed_instruction_length: Average length of changed instructions + max_changed_instruction_length: Maximum length of changed instructions + unique_changed_instructions: Number of unique changed instructions + + min_average_relevant_docs_per_query: Minimum number of relevant docs per query average_relevant_docs_per_query: Average number of relevant docs per query + max_average_relevant_docs_per_query: Maximum number of relevant docs per query + + min_average_top_ranked_per_query: Minimum number of top ranked docs per query average_top_ranked_per_query: Average number of top ranked docs per query + max_average_top_ranked_per_query: Maximum number of top ranked docs per query """ num_samples: int num_queries: int num_docs: int number_of_characters: int + + min_document_length: int average_document_length: float + max_document_length: int + unique_docs: int + + min_query_length: int average_query_length: float + max_query_length: int + unique_queries: int + + min_instruction_length: int average_instruction_length: float + max_instruction_length: int + unique_instructions: int + + min_changed_instruction_length: int average_changed_instruction_length: float + max_changed_instruction_length: int + unique_changed_instructions: int + + min_average_relevant_docs_per_query: float average_relevant_docs_per_query: float + max_average_relevant_docs_per_query: float + + min_average_top_ranked_per_query: float average_top_ranked_per_query: float + max_average_top_ranked_per_query: float class AbsTaskInstructionRetrieval(AbsTask): @@ -665,25 +711,31 @@ def _calculate_metrics_from_split( changed_instructions = self.changed_instructions[split] top_ranked = self.top_ranked[split] - total_corpus_len = sum( - [len(doc.get("title", "")) + len(doc["text"]) for doc in corpus.values()] - ) - total_queries_len = sum([len(query) for query in queries.values()]) - total_instructions_len = sum( - [len(instruction) for instruction in og_instructions.values()] - ) - total_changed_instructions_len = sum( - [len(instruction) for instruction in changed_instructions.values()] - ) - num_qrels_non_zero = sum( + corpus_combined = [ + doc.get("title", "") + doc["text"] for doc in corpus.values() + ] + corpus_len = [len(doc) for doc in corpus_combined] + total_corpus_len = sum(corpus_len) + + queries_len = [len(query) for query in queries.values()] + total_queries_len = sum(queries_len) + instructions_len = [ + len(instruction) for instruction in og_instructions.values() + ] + total_instructions_len = sum(instructions_len) + changed_instructions_len = [ + len(instruction) for instruction in changed_instructions.values() + ] + total_changed_instructions_len = sum(changed_instructions_len) + qrels_non_zero = [ sum(1 for doc_id in docs if docs[doc_id] != 0) for docs in relevant_docs.values() - ) + ] + num_qrels_non_zero = sum(qrels_non_zero) qrels_per_doc = num_qrels_non_zero / len(relevant_docs) if len(queries) else 0 + ranked_per_query = [len(docs) for docs in top_ranked.values()] top_ranked_per_query = ( - sum(len(docs) for docs in top_ranked.values()) / len(queries) - if len(queries) - else 0 + sum(ranked_per_query) / len(queries) if len(queries) else 0 ) return InstructionRetrievalDescriptiveStatistics( num_samples=len(queries) + len(corpus), @@ -693,20 +745,36 @@ def _calculate_metrics_from_split( + total_queries_len + total_instructions_len + total_changed_instructions_len, + min_document_length=min(corpus_len), average_document_length=( total_corpus_len / len(corpus) if len(corpus) else 0 ), + max_document_length=max(corpus_len), + unique_docs=len(set(corpus_combined)), + min_query_length=min(queries_len), average_query_length=( total_queries_len / len(queries) if len(queries) else 0 ), + max_query_length=max(queries_len), + unique_queries=len(set(queries.values())), + min_instruction_length=min(instructions_len), average_instruction_length=( total_instructions_len / len(queries) if len(queries) else 0 ), + max_instruction_length=max(instructions_len), + unique_instructions=len(set(og_instructions.values())), + min_changed_instruction_length=min(changed_instructions_len), average_changed_instruction_length=( total_changed_instructions_len / len(queries) if len(queries) else 0 ), + max_changed_instruction_length=max(changed_instructions_len), + unique_changed_instructions=len(set(changed_instructions.values())), + min_average_relevant_docs_per_query=min(qrels_non_zero), average_relevant_docs_per_query=qrels_per_doc, + max_average_relevant_docs_per_query=max(qrels_non_zero), + min_average_top_ranked_per_query=min(ranked_per_query), average_top_ranked_per_query=top_ranked_per_query, + max_average_top_ranked_per_query=max(ranked_per_query), ) diff --git a/mteb/abstasks/AbsTaskMultilabelClassification.py b/mteb/abstasks/AbsTaskMultilabelClassification.py index 6fd3acf905..38d3722ff2 100644 --- a/mteb/abstasks/AbsTaskMultilabelClassification.py +++ b/mteb/abstasks/AbsTaskMultilabelClassification.py @@ -47,16 +47,32 @@ class MultilabelClassificationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + number_texts_in_train: Number of texts in the train split + + min_text_length: Minimum length of text average_text_length: Average length of text + max_text_length: Maximum length of text + unique_texts: Number of unique texts + + min_labels_per_text: Minimum number of labels per text average_label_per_text: Average number of labels per text + max_labels_per_text: Maximum number of labels per text unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int + number_texts_in_train: int | None + + min_text_length: int average_text_length: float + max_text_length: int + unique_texts: int + + min_labels_per_text: int average_label_per_text: float + max_labels_per_text: int unique_labels: int labels: dict[str, dict[str, int]] @@ -231,30 +247,48 @@ def _undersample_data_indices(self, y, samples_per_label, idxs=None): def _calculate_metrics_from_split( self, split: str, hf_subset: str | None = None, compute_overall: bool = False ) -> MultilabelClassificationDescriptiveStatistics: + train_text = [] if hf_subset: text = self.dataset[hf_subset][split]["text"] label = self.dataset[hf_subset][split]["label"] + if split != "train": + train_text = self.dataset[hf_subset]["train"]["text"] elif compute_overall: text = [] label = [] for hf_subset in self.metadata.eval_langs: text.extend(self.dataset[hf_subset][split]["text"]) label.extend(self.dataset[hf_subset][split]["label"]) + if split != "train": + train_text.extend(self.dataset[hf_subset]["train"]["text"]) else: text = self.dataset[split]["text"] label = self.dataset[split]["label"] + if split != "train": + train_text = self.dataset["train"]["text"] - total_text_len = sum(len(t) for t in text) - total_label_len = sum(len(l) for l in label) + text_len = [len(t) for t in text] + total_text_len = sum(text_len) + label_len = [len(l) for l in label] + total_label_len = sum(label_len) total_labels = [] for l in label: total_labels.extend(l if len(l) > 0 else [None]) label_count = Counter(total_labels) + num_texts_in_train = ( + len(set(text) & set(train_text)) if split != "train" else None + ) return MultilabelClassificationDescriptiveStatistics( - average_text_length=total_text_len / len(text), + num_samples=len(text), number_of_characters=total_text_len, + number_texts_in_train=num_texts_in_train, + min_text_length=min(text_len), + average_text_length=total_text_len / len(text), + max_text_length=max(text_len), + unique_texts=len(set(text)), + min_labels_per_text=min(label_len), average_label_per_text=total_label_len / len(label), - num_samples=len(text), + max_labels_per_text=max(label_len), unique_labels=len(label_count), labels={ str(label): { diff --git a/mteb/abstasks/AbsTaskPairClassification.py b/mteb/abstasks/AbsTaskPairClassification.py index 0cbdafda8b..82ba128c28 100644 --- a/mteb/abstasks/AbsTaskPairClassification.py +++ b/mteb/abstasks/AbsTaskPairClassification.py @@ -20,16 +20,34 @@ class PairClassificationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. - avg_sentence1_len: Average length of sentence1 - avg_sentence2_len: Average length of sentence2 + + min_sentence1_length: Minimum length of sentence1 + avg_sentence1_length: Average length of sentence1 + max_sentence1_length: Maximum length of sentence1 + unique_sentence1: Number of unique sentence + + min_sentence2_length: Minimum length of sentence2 + avg_sentence2_length: Average length of sentence2 + max_sentence2_length: Maximum length of sentence2 + unique_sentence2: Number of unique sentence + unique_labels: Number of unique labels labels: dict of label frequencies """ num_samples: int number_of_characters: int - avg_sentence1_len: float - avg_sentence2_len: float + + min_sentence1_length: int + avg_sentence1_length: float + max_sentence1_length: int + unique_sentence1: int + + min_sentence2_length: int + avg_sentence2_length: float + max_sentence2_length: int + unique_sentence2: int + unique_labels: int labels: dict[str, dict[str, int]] @@ -109,14 +127,22 @@ def _calculate_metrics_from_split( dataset["labels"][0] if len(dataset["labels"]) == 1 else dataset["labels"] ) - total_sentence1_len = sum([len(sentence) for sentence in sentence1]) - total_sentence2_len = sum([len(sentence) for sentence in sentence2]) + sentence1_len = [len(sentence) for sentence in sentence1] + total_sentence1_len = sum(sentence1_len) + sentence2_len = [len(sentence) for sentence in sentence2] + total_sentence2_len = sum(sentence2_len) label_count = Counter(labels) return PairClassificationDescriptiveStatistics( num_samples=len(sentence1), number_of_characters=total_sentence1_len + total_sentence2_len, - avg_sentence1_len=total_sentence1_len / len(sentence1), - avg_sentence2_len=total_sentence2_len / len(sentence2), + min_sentence1_length=min(sentence1_len), + avg_sentence1_length=total_sentence1_len / len(sentence1), + max_sentence1_length=max(sentence1_len), + unique_sentence1=len(set(sentence1)), + min_sentence2_length=min(sentence2_len), + avg_sentence2_length=total_sentence2_len / len(sentence2), + max_sentence2_length=max(sentence2_len), + unique_sentence2=len(set(sentence2)), unique_labels=len(set(labels)), labels={ str(label): {"count": count} for label, count in label_count.items() diff --git a/mteb/abstasks/AbsTaskReranking.py b/mteb/abstasks/AbsTaskReranking.py index 3703b5a3c2..ab00a53a39 100644 --- a/mteb/abstasks/AbsTaskReranking.py +++ b/mteb/abstasks/AbsTaskReranking.py @@ -20,18 +20,42 @@ class RerankingDescriptiveStatistics(DescriptiveStatistics): number_of_characters: Total number of symbols in the dataset. num_positive: Number of positive examples num_negative: Number of negative examples - avg_query_len: Average length of queries - avg_positive_len: Average length of positive examples - avg_negative_len: Average length of negative examples + + min_query_length: Minimum length of queries + avg_query_length: Average length of queries + max_query_length: Maximum length of queries + unique_query: Number of unique queries + + min_positive_length: Minimum length of positive examples + avg_positive_length: Average length of positive examples + max_positive_length: Maximum length of positive examples + unique_positive: Number of unique positive examples + + min_negative_length: Minimum length of negative examples + avg_negative_length: Average length of negative examples + max_negative_length: Maximum length of negative examples + unique_negative: Number of unique negative examples """ num_samples: int number_of_characters: int num_positive: int num_negative: int - avg_query_len: float - avg_positive_len: float - avg_negative_len: float + + min_query_length: int + avg_query_length: float + max_query_length: int + unique_query: int + + min_positive_length: int + avg_positive_length: float + max_positive_length: int + unique_positive: int + + min_negative_length: int + avg_negative_length: float + max_negative_length: int + unique_negative: int class AbsTaskReranking(AbsTask): @@ -98,9 +122,12 @@ def _calculate_metrics_from_split( positive = transform_reranking_data(self.dataset[split]["positive"]) negative = transform_reranking_data(self.dataset[split]["negative"]) - total_len_query = sum([len(q) for q in query]) - total_len_positive = sum([len(p) for p in positive]) - total_len_negative = sum([len(n) for n in negative]) + len_query = [len(q) for q in query] + total_len_query = sum(len_query) + len_positive = [len(p) for p in positive] + total_len_positive = sum(len_positive) + len_negative = [len(n) for n in negative] + total_len_negative = sum(len_negative) return RerankingDescriptiveStatistics( num_samples=len(query), number_of_characters=total_len_query @@ -108,9 +135,18 @@ def _calculate_metrics_from_split( + total_len_negative, num_positive=len(positive), num_negative=len(negative), - avg_query_len=total_len_query / len(query), - avg_positive_len=total_len_positive / len(positive), - avg_negative_len=total_len_negative / len(negative), + min_query_length=min(len_query), + avg_query_length=total_len_query / len(query), + max_query_length=max(len_query), + unique_query=len(set(query)), + min_positive_length=min(len_positive), + avg_positive_length=total_len_positive / len(positive), + max_positive_length=max(len_positive), + unique_positive=len(set(positive)), + min_negative_length=min(len_negative), + avg_negative_length=total_len_negative / len(negative), + max_negative_length=max(len_negative), + unique_negative=len(set(negative)), ) diff --git a/mteb/abstasks/AbsTaskRetrieval.py b/mteb/abstasks/AbsTaskRetrieval.py index 078979b6bf..95746e1a2d 100644 --- a/mteb/abstasks/AbsTaskRetrieval.py +++ b/mteb/abstasks/AbsTaskRetrieval.py @@ -206,18 +206,42 @@ class RetrievalDescriptiveStatistics(DescriptiveStatistics): num_queries: number of queries in the dataset num_documents: Number of documents number_of_characters: Total number of symbols in the dataset + + min_document_length: Minimum length of documents average_document_length: Average length of documents + max_document_length: Maximum length of documents + unique_documents: Number of unique documents + + min_query_length: Minimum length of queries average_query_length: Average length of queries + max_query_length: Maximum length of queries + unique_queries: Number of unique queries + + min_relevant_docs_per_query: Minimum number of relevant documents per query average_relevant_docs_per_query: Average number of relevant documents per query + max_relevant_docs_per_query: Maximum number of relevant documents per query + unique_relevant_docs: Number of unique relevant documents """ num_samples: int num_queries: int num_documents: int number_of_characters: int + + min_document_length: int average_document_length: float + max_document_length: int + unique_documents: int + + min_query_length: int average_query_length: float + max_query_length: int + unique_queries: int + + min_relevant_docs_per_query: int average_relevant_docs_per_query: float + max_relevant_docs_per_query: int + unique_relevant_docs: int class AbsTaskRetrieval(AbsTask): @@ -436,26 +460,36 @@ def _calculate_metrics_from_split( num_documents = len(corpus) num_queries = len(queries) - # number of qrels that are not 0 - num_qrels_non_zero = sum( - sum(1 for doc_id in docs if docs[doc_id] != 0) - for docs in relevant_docs.values() - ) - qrels_per_doc = num_qrels_non_zero / len(relevant_docs) if num_queries else 0 + # create a list of number of relevant docs per query + qrels_lengths = [ + len(relevant_docs[qid]) for qid in relevant_docs if qid in queries + ] + num_qrels = sum(qrels_lengths) + qrels_per_doc = num_qrels / len(relevant_docs) if num_queries else 0 + unique_qrels = len({doc for qid in relevant_docs for doc in relevant_docs[qid]}) return RetrievalDescriptiveStatistics( - number_of_characters=query_len + doc_len, + number_of_characters=sum(query_len) + sum(doc_len), num_samples=num_documents + num_queries, num_queries=num_queries, num_documents=num_documents, - average_document_length=doc_len / num_documents, - average_query_length=query_len / num_queries, + min_document_length=min(doc_len), + average_document_length=sum(doc_len) / num_documents, + max_document_length=max(doc_len), + unique_documents=len(set(corpus)), + min_query_length=min(query_len), + average_query_length=sum(query_len) / num_queries, + max_query_length=max(query_len), + unique_queries=len(set(queries)), + min_relevant_docs_per_query=min(qrels_lengths), average_relevant_docs_per_query=qrels_per_doc, + max_relevant_docs_per_query=max(qrels_lengths), + unique_relevant_docs=unique_qrels, ) def calculate_length( queries: dict[str, str], corpus: dict[str, str] -) -> tuple[int, int]: +) -> tuple[list[int], list[int]]: queries_lens = [] doc_lens = [] for query in queries.values(): @@ -467,9 +501,7 @@ def calculate_length( for doc in corpus.values(): doc_lens.append(len(doc)) - doc_len = sum(doc_lens) / len(doc_lens) if doc_lens else 0 - query_len = sum(queries_lens) / len(queries_lens) if queries_lens else 0 - return query_len, doc_len + return doc_lens, queries_lens def process_docs( diff --git a/mteb/abstasks/AbsTaskSTS.py b/mteb/abstasks/AbsTaskSTS.py index c9fa896b69..d12b88545d 100644 --- a/mteb/abstasks/AbsTaskSTS.py +++ b/mteb/abstasks/AbsTaskSTS.py @@ -17,16 +17,36 @@ class STSDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. + + min_sentence1_length: Minimum length of sentence1 average_sentence1_len: Average length of sentence1 + max_sentence1_length: Maximum length of sentence1 + + min_sentence2_length: Minimum length of sentence2 average_sentence2_len: Average length of sentence2 + max_sentence2_length: Maximum length of sentence2 + + min_score: Minimum score avg_score: Average score + max_score: Maximum score """ num_samples: int number_of_characters: int + + min_sentence1_length: int average_sentence1_len: float + max_sentence1_length: int + unique_sentence1: int + + min_sentence2_length: int average_sentence2_len: float + max_sentence2_length: int + unique_sentence2: int + + min_score: float avg_score: float + max_score: float class AbsTaskSTS(AbsTask): @@ -93,13 +113,23 @@ def _calculate_metrics_from_split( sentence2 = self.dataset[split]["sentence2"] score = self.dataset[split]["score"] - total_sentence1_len = sum([len(s) for s in sentence1]) - total_sentence2_len = sum([len(s) for s in sentence2]) + sentence1_len = [len(s) for s in sentence1] + sentence2_len = [len(s) for s in sentence2] + total_sentence1_len = sum(sentence1_len) + total_sentence2_len = sum(sentence2_len) avg_score = sum(score) / len(score) return STSDescriptiveStatistics( num_samples=len(sentence1), number_of_characters=total_sentence1_len + total_sentence2_len, + min_sentence1_length=min(sentence1_len), average_sentence1_len=total_sentence1_len / len(sentence1), + max_sentence1_length=max(sentence1_len), + unique_sentence1=len(set(sentence1)), + min_sentence2_length=min(sentence2_len), average_sentence2_len=total_sentence2_len / len(sentence2), + max_sentence2_length=max(sentence2_len), + unique_sentence2=len(set(sentence2)), + min_score=min(score), avg_score=avg_score, + max_score=max(score), ) diff --git a/mteb/abstasks/AbsTaskSummarization.py b/mteb/abstasks/AbsTaskSummarization.py index 6d792c3199..07fd420571 100644 --- a/mteb/abstasks/AbsTaskSummarization.py +++ b/mteb/abstasks/AbsTaskSummarization.py @@ -21,18 +21,48 @@ class SummarizationDescriptiveStatistics(DescriptiveStatistics): Attributes: num_samples: number of samples in the dataset. number_of_characters: Total number of symbols in the dataset. - avg_text_len: Average length of text - avg_human_summaries_len: Average length of human summaries - avg_machine_summaries_len: Average length of machine summaries + + min_text_length: Minimum length of text + avg_text_length: Average length of text + max_text_length: Maximum length of text + unique_texts: Number of unique texts + + min_human_summaries_length: Minimum length of human summaries + avg_human_summaries_length: Average length of human summaries + max_human_summaries_length: Maximum length of human summaries + unique_human_summaries: Number of unique human summaries + + min_machine_summaries_length: Minimum length of machine summaries + avg_machine_summaries_length: Average length of machine summaries + max_machine_summaries_length: Maximum length of machine summaries + unique_machine_summaries: Number of unique machine summaries + + min_relevance: Minimum relevance score avg_relevance: Average relevance score + max_relevance: Maximum relevance score """ num_samples: int number_of_characters: int - avg_text_len: float - avg_human_summaries_len: float - avg_machine_summaries_len: float + + min_text_length: int + avg_text_length: float + max_text_length: int + unique_texts: int + + min_human_summaries_length: int + avg_human_summaries_length: float + max_human_summaries_length: int + unique_human_summaries: int + + min_machine_summaries_length: int + avg_machine_summaries_length: float + max_machine_summaries_length: int + unique_machine_summaries: int + + min_relevance: float avg_relevance: float + max_relevance: float class AbsTaskSummarization(AbsTask): @@ -112,17 +142,39 @@ def _calculate_metrics_from_split( machine_summaries = self.dataset[split]["machine_summaries"] relevance = self.dataset[split]["relevance"] - total_text_len = sum(len(x) for x in text) - total_human_summaries_len = sum(len(x) for x in human_summaries) - total_machine_summaries_len = sum(len(x) for x in machine_summaries) + all_human_summaries = [] + for s in human_summaries: + all_human_summaries.extend(s) + + all_machine_summaries = [] + for s in machine_summaries: + all_machine_summaries.extend(s) + + text_len = [len(t) for t in text] + total_text_len = sum(text_len) + human_summaries_len = [len(s) for s in human_summaries] + total_human_summaries_len = sum(human_summaries_len) + machine_summaries_len = [len(s) for s in machine_summaries] + total_machine_summaries_len = sum(machine_summaries_len) total_relevance = sum(sum(x) / len(x) for x in relevance) return SummarizationDescriptiveStatistics( num_samples=len(text), number_of_characters=total_text_len + total_human_summaries_len + total_machine_summaries_len, - avg_text_len=total_text_len / len(text), - avg_human_summaries_len=total_human_summaries_len / len(text), - avg_machine_summaries_len=total_machine_summaries_len / len(text), + min_text_length=min(text_len), + avg_text_length=total_text_len / len(text), + max_text_length=max(text_len), + unique_texts=len(set(text)), + min_human_summaries_length=min(human_summaries_len), + avg_human_summaries_length=total_human_summaries_len / len(text), + max_human_summaries_length=max(human_summaries_len), + unique_human_summaries=len(set(all_human_summaries)), + min_machine_summaries_length=min(machine_summaries_len), + avg_machine_summaries_length=total_machine_summaries_len / len(text), + max_machine_summaries_length=max(machine_summaries_len), + unique_machine_summaries=len(set(all_machine_summaries)), + min_relevance=min(relevance), avg_relevance=total_relevance / len(relevance), + max_relevance=max(relevance), ) diff --git a/mteb/descriptive_stats/BitextMining/BUCC.v2.json b/mteb/descriptive_stats/BitextMining/BUCC.v2.json new file mode 100644 index 0000000000..75ef75ced5 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/BUCC.v2.json @@ -0,0 +1,69 @@ +{ + "test": { + "num_samples": 35000, + "number_of_characters": 6640032, + "unique_pairs": 34978, + "min_sentence1_length": 16, + "average_sentence1_length": 99.10931428571429, + "max_sentence1_length": 204, + "unique_sentence1": 34978, + "min_sentence2_length": 42, + "average_sentence2_length": 90.60588571428572, + "max_sentence2_length": 159, + "unique_sentence2": 25306, + "hf_subset_descriptive_stats": { + "de-en": { + "num_samples": 9580, + "number_of_characters": 1919197, + "unique_pairs": 9573, + "min_sentence1_length": 50, + "average_sentence1_length": 109.07974947807934, + "max_sentence1_length": 204, + "unique_sentence1": 9573, + "min_sentence2_length": 46, + "average_sentence2_length": 91.25396659707724, + "max_sentence2_length": 155, + "unique_sentence2": 9570 + }, + "fr-en": { + "num_samples": 9086, + "number_of_characters": 1677545, + "unique_pairs": 9081, + "min_sentence1_length": 43, + "average_sentence1_length": 99.31785163988553, + "max_sentence1_length": 174, + "unique_sentence1": 9081, + "min_sentence2_length": 42, + "average_sentence2_length": 85.3117983711204, + "max_sentence2_length": 159, + "unique_sentence2": 9076 + }, + "ru-en": { + "num_samples": 14435, + "number_of_characters": 2808206, + "unique_pairs": 14425, + "min_sentence1_length": 40, + "average_sentence1_length": 101.6593003117423, + "max_sentence1_length": 186, + "unique_sentence1": 14425, + "min_sentence2_length": 45, + "average_sentence2_length": 92.88216141323173, + "max_sentence2_length": 159, + "unique_sentence2": 14424 + }, + "zh-en": { + "num_samples": 1899, + "number_of_characters": 235084, + "unique_pairs": 1899, + "min_sentence1_length": 16, + "average_sentence1_length": 28.429699842022117, + "max_sentence1_length": 40, + "unique_sentence1": 1899, + "min_sentence2_length": 48, + "average_sentence2_length": 95.3638757240653, + "max_sentence2_length": 159, + "unique_sentence2": 1899 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json b/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json index 131c9966ac..0675e5e0ef 100644 --- a/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/BornholmBitextMining.json @@ -1,8 +1,15 @@ { "test": { + "num_samples": 500, + "number_of_characters": 44361, + "unique_pairs": 500, + "min_sentence1_length": 1, "average_sentence1_length": 49.834, + "max_sentence1_length": 555, + "unique_sentence1": 497, + "min_sentence2_length": 5, "average_sentence2_length": 38.888, - "num_samples": 500, - "number_of_characters": 44361 + "max_sentence2_length": 453, + "unique_sentence2": 491 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json b/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json index 507d9ad7bf..effafd237b 100644 --- a/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/IN22ConvBitextMining.json @@ -1,3045 +1,6594 @@ { "test": { - "average_sentence1_length": 54.32948595562498, - "average_sentence2_length": 54.32948595562498, "num_samples": 760518, "number_of_characters": 82637104, + "unique_pairs": 759283, + "min_sentence1_length": 3, + "average_sentence1_length": 54.32948595562498, + "max_sentence1_length": 239, + "unique_sentence1": 34430, + "min_sentence2_length": 3, + "average_sentence2_length": 54.32948595562498, + "max_sentence2_length": 239, + "unique_sentence2": 34430, "hf_subset_descriptive_stats": { "asm_Beng-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 155988, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 155988 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "asm_Beng-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 162044, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 162044 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "asm_Beng-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 167032, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 167032 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "asm_Beng-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 160716, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 160716 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "asm_Beng-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 156282, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 156282 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "asm_Beng-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 158269, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 158269 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "asm_Beng-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 159964, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 159964 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "asm_Beng-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 165177, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 165177 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "asm_Beng-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 164681, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 164681 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "asm_Beng-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 162408, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 162408 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "asm_Beng-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 172838, + "unique_pairs": 1498, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 172838 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "asm_Beng-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 162747, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 162747 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "asm_Beng-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 157316, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 157316 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "asm_Beng-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 160906, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 160906 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "asm_Beng-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 164223, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 164223 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "asm_Beng-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 160201, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 160201 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "asm_Beng-san_Deva": { + "num_samples": 1503, + "number_of_characters": 158093, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 158093 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "asm_Beng-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 169379, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 169379 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "asm_Beng-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 162623, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 162623 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "asm_Beng-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 174866, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 174866 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "asm_Beng-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 157690, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 157690 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "asm_Beng-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 161305, + "unique_pairs": 1498, + "min_sentence1_length": 4, "average_sentence1_length": 53.753825681969396, + "max_sentence1_length": 208, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 161305 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "ben_Beng-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 155988, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 155988 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "ben_Beng-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 156448, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 156448 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "ben_Beng-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 161436, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 161436 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "ben_Beng-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 155120, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 155120 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "ben_Beng-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 150686, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 150686 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "ben_Beng-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 152673, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 152673 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "ben_Beng-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 154368, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 154368 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "ben_Beng-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 159581, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 159581 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "ben_Beng-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 159085, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 159085 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "ben_Beng-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 156812, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 156812 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "ben_Beng-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 167242, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 167242 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "ben_Beng-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 157151, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 157151 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "ben_Beng-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 151720, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 151720 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "ben_Beng-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 155310, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 155310 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "ben_Beng-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 158627, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 158627 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "ben_Beng-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 154605, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 154605 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "ben_Beng-san_Deva": { + "num_samples": 1503, + "number_of_characters": 152497, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 152497 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "ben_Beng-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 163783, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 163783 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "ben_Beng-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 157027, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 157027 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "ben_Beng-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 169270, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 169270 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "ben_Beng-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 152094, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 152094 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "ben_Beng-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 155709, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.03060545575516, + "max_sentence1_length": 178, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 155709 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "brx_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 162044, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 162044 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "brx_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 156448, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 156448 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "brx_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 167492, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 167492 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "brx_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 161176, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 161176 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "brx_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 156742, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 156742 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "brx_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 158729, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 158729 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "brx_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 160424, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 160424 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "brx_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 165637, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 165637 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "brx_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 165141, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 165141 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "brx_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 162868, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 162868 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "brx_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 173298, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 173298 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "brx_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 163207, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 163207 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "brx_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 157776, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 157776 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "brx_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 161366, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 161366 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "brx_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 164683, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 164683 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "brx_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 160661, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 160661 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "brx_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 158553, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 158553 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "brx_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 169839, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 169839 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "brx_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 163083, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 163083 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "brx_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 175326, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 175326 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "brx_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 158150, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 158150 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "brx_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 161765, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.05988023952096, + "max_sentence1_length": 210, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 161765 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "doi_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 167032, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 167032 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "doi_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 161436, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 161436 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "doi_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 167492, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 167492 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "doi_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 166164, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 166164 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "doi_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 161730, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 161730 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "doi_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 163717, + "unique_pairs": 1503, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 163717 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "doi_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 165412, + "unique_pairs": 1503, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 165412 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "doi_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 170625, + "unique_pairs": 1503, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 170625 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "doi_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 170129, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 170129 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "doi_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 167856, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 167856 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "doi_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 178286, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 178286 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "doi_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 168195, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 168195 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "doi_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 162764, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 162764 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "doi_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 166354, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 166354 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "doi_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 169671, + "unique_pairs": 1503, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 169671 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "doi_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 165649, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 165649 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "doi_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 163541, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 163541 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "doi_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 174827, + "unique_pairs": 1503, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 174827 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "doi_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 168071, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 168071 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "doi_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 180314, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 180314 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "doi_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 163138, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 163138 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "doi_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 166753, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 57.37857618097139, + "max_sentence1_length": 209, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 166753 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "eng_Latn-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 160716, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 160716 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "eng_Latn-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 155120, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 155120 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "eng_Latn-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 161176, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 161176 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "eng_Latn-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 166164, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 166164 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "eng_Latn-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 155414, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 155414 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "eng_Latn-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 157401, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 157401 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "eng_Latn-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 159096, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 159096 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "eng_Latn-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 164309, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 164309 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "eng_Latn-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 163813, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 163813 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "eng_Latn-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 161540, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 161540 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "eng_Latn-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 171970, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 171970 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "eng_Latn-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 161879, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 161879 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "eng_Latn-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 156448, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 156448 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "eng_Latn-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 160038, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 160038 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "eng_Latn-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 163355, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 163355 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "eng_Latn-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 159333, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 159333 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "eng_Latn-san_Deva": { + "num_samples": 1503, + "number_of_characters": 157225, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 157225 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "eng_Latn-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 168511, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 168511 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "eng_Latn-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 161755, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 161755 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "eng_Latn-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 173998, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 173998 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "eng_Latn-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 156822, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 156822 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "eng_Latn-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 160437, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.17631403858949, + "max_sentence1_length": 201, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 160437 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "gom_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 156282, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 156282 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "gom_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 150686, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 150686 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "gom_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 156742, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 156742 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "gom_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 161730, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 161730 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "gom_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 155414, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 155414 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "gom_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 152967, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 152967 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "gom_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 154662, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 154662 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "gom_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 159875, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 159875 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "gom_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 159379, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 159379 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "gom_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 157106, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 157106 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "gom_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 167536, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 167536 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "gom_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 157445, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 157445 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "gom_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 152014, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 152014 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "gom_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 155604, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 155604 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "gom_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 158921, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 158921 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "gom_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 154899, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 154899 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "gom_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 152791, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 152791 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "gom_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 164077, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 164077 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "gom_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 157321, + "unique_pairs": 1500, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 157321 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "gom_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 169564, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 169564 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "gom_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 152388, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 152388 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "gom_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 156003, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 50.22621423819029, + "max_sentence1_length": 203, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 156003 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "guj_Gujr-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 158269, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 158269 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "guj_Gujr-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 152673, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 152673 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "guj_Gujr-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 158729, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 158729 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "guj_Gujr-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 163717, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 163717 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "guj_Gujr-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 157401, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 157401 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "guj_Gujr-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 152967, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 152967 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "guj_Gujr-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 156649, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 156649 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "guj_Gujr-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 161862, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 161862 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "guj_Gujr-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 161366, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 161366 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "guj_Gujr-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 159093, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 159093 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "guj_Gujr-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 169523, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 169523 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "guj_Gujr-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 159432, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 159432 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "guj_Gujr-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 154001, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 154001 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "guj_Gujr-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 157591, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 157591 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "guj_Gujr-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 160908, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 160908 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "guj_Gujr-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 156886, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 156886 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "guj_Gujr-san_Deva": { + "num_samples": 1503, + "number_of_characters": 154778, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 154778 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "guj_Gujr-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 166064, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 166064 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "guj_Gujr-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 159308, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 159308 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "guj_Gujr-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 171551, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 171551 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "guj_Gujr-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 154375, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 154375 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "guj_Gujr-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 157990, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 51.54823685961411, + "max_sentence1_length": 205, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 157990 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "hin_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 159964, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 159964 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "hin_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 154368, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 154368 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "hin_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 160424, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 160424 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "hin_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 165412, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 165412 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "hin_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 159096, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 159096 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "hin_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 154662, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 154662 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "hin_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 156649, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 156649 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "hin_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 163557, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 163557 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "hin_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 163061, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 163061 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "hin_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 160788, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 160788 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "hin_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 171218, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 171218 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "hin_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 161127, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 161127 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "hin_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 155696, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 155696 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "hin_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 159286, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 159286 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "hin_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 162603, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 162603 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "hin_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 158581, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 158581 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "hin_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 156473, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 156473 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "hin_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 167759, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 167759 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "hin_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 161003, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 161003 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "hin_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 173246, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 173246 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "hin_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 156070, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 156070 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "hin_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 159685, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.67598137059215, + "max_sentence1_length": 192, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 159685 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "kan_Knda-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 165177, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 165177 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "kan_Knda-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 159581, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 159581 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "kan_Knda-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 165637, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 165637 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "kan_Knda-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 170625, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 170625 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "kan_Knda-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 164309, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 164309 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "kan_Knda-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 159875, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 159875 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "kan_Knda-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 161862, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 161862 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "kan_Knda-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 163557, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 163557 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "kan_Knda-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 168274, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 168274 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "kan_Knda-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 166001, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 166001 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "kan_Knda-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 176431, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 176431 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "kan_Knda-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 166340, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 166340 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "kan_Knda-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 160909, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 160909 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "kan_Knda-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 164499, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 164499 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "kan_Knda-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 167816, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 167816 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "kan_Knda-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 163794, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 163794 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "kan_Knda-san_Deva": { + "num_samples": 1503, + "number_of_characters": 161686, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 161686 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "kan_Knda-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 172972, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 172972 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "kan_Knda-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 166216, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 166216 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "kan_Knda-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 178459, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 178459 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "kan_Knda-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 161283, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 161283 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "kan_Knda-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 164898, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 56.14437791084497, + "max_sentence1_length": 201, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 164898 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "kas_Arab-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 164681, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 164681 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "kas_Arab-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 159085, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 159085 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "kas_Arab-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 165141, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 165141 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "kas_Arab-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 170129, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 170129 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "kas_Arab-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 163813, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 163813 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "kas_Arab-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 159379, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 159379 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "kas_Arab-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 161366, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 161366 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "kas_Arab-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 163061, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 163061 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "kas_Arab-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 168274, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 168274 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "kas_Arab-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 165505, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 165505 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "kas_Arab-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 175935, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 175935 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "kas_Arab-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 165844, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 165844 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "kas_Arab-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 160413, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 160413 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "kas_Arab-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 164003, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 164003 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "kas_Arab-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 167320, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 167320 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "kas_Arab-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 163298, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 163298 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "kas_Arab-san_Deva": { + "num_samples": 1503, + "number_of_characters": 161190, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 161190 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "kas_Arab-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 172476, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 172476 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "kas_Arab-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 165720, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 165720 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "kas_Arab-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 177963, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 177963 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "kas_Arab-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 160787, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 160787 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "kas_Arab-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 164402, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 55.81437125748503, + "max_sentence1_length": 203, + "unique_sentence1": 1502, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 164402 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "mai_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 162408, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 162408 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "mai_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 156812, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 156812 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "mai_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 162868, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 162868 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "mai_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 167856, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 167856 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "mai_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 161540, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 161540 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "mai_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 157106, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 157106 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "mai_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 159093, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 159093 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "mai_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 160788, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 160788 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "mai_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 166001, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 166001 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "mai_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 165505, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 165505 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "mai_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 173662, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 173662 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "mai_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 163571, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 163571 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "mai_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 158140, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 158140 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "mai_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 161730, + "unique_pairs": 1500, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 161730 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "mai_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 165047, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 165047 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "mai_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 161025, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 161025 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "mai_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 158917, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 158917 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "mai_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 170203, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 170203 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "mai_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 163447, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 163447 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "mai_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 175690, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 175690 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "mai_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 158514, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 158514 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "mai_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 162129, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 54.3020625415835, + "max_sentence1_length": 230, + "unique_sentence1": 1499, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 162129 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "mal_Mlym-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 172838, + "unique_pairs": 1498, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 172838 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "mal_Mlym-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 167242, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 167242 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "mal_Mlym-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 173298, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 173298 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "mal_Mlym-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 178286, + "unique_pairs": 1500, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 178286 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "mal_Mlym-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 171970, + "unique_pairs": 1499, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 171970 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "mal_Mlym-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 167536, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 167536 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "mal_Mlym-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 169523, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 169523 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "mal_Mlym-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 171218, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 171218 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "mal_Mlym-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 176431, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 176431 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "mal_Mlym-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 175935, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 175935 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "mal_Mlym-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 173662, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 173662 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "mal_Mlym-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 174001, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 174001 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "mal_Mlym-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 168570, + "unique_pairs": 1500, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 168570 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "mal_Mlym-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 172160, + "unique_pairs": 1500, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 172160 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "mal_Mlym-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 175477, + "unique_pairs": 1503, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 175477 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "mal_Mlym-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 171455, + "unique_pairs": 1498, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 171455 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "mal_Mlym-san_Deva": { + "num_samples": 1503, + "number_of_characters": 169347, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 169347 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "mal_Mlym-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 180633, + "unique_pairs": 1501, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 180633 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "mal_Mlym-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 173877, + "unique_pairs": 1499, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 173877 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "mal_Mlym-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 186120, + "unique_pairs": 1502, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 186120 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "mal_Mlym-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 168944, + "unique_pairs": 1500, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 168944 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "mal_Mlym-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 172559, + "unique_pairs": 1499, + "min_sentence1_length": 5, "average_sentence1_length": 61.24151696606786, + "max_sentence1_length": 219, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 172559 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "mar_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 162747, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 162747 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "mar_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 157151, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 157151 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "mar_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 163207, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 163207 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "mar_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 168195, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 168195 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "mar_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 161879, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 161879 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "mar_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 157445, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 157445 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "mar_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 159432, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 159432 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "mar_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 161127, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 161127 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "mar_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 166340, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 166340 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "mar_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 165844, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 165844 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "mar_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 163571, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 163571 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "mar_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 174001, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 174001 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "mar_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 158479, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 158479 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "mar_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 162069, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 162069 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "mar_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 165386, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 165386 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "mar_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 161364, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 161364 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "mar_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 159256, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 159256 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "mar_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 170542, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 170542 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "mar_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 163786, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 163786 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "mar_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 176029, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 176029 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "mar_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 158853, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 158853 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "mar_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 162468, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.52761144377911, + "max_sentence1_length": 221, + "unique_sentence1": 1501, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 162468 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "mni_Mtei-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 157316, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 157316 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "mni_Mtei-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 151720, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 151720 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "mni_Mtei-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 157776, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 157776 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "mni_Mtei-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 162764, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 162764 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "mni_Mtei-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 156448, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 156448 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "mni_Mtei-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 152014, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 152014 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "mni_Mtei-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 154001, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 154001 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "mni_Mtei-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 155696, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 155696 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "mni_Mtei-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 160909, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 160909 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "mni_Mtei-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 160413, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 160413 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "mni_Mtei-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 158140, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 158140 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "mni_Mtei-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 168570, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 168570 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "mni_Mtei-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 158479, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 158479 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "mni_Mtei-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 156638, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 156638 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "mni_Mtei-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 159955, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 159955 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "mni_Mtei-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 155933, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 155933 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "mni_Mtei-san_Deva": { + "num_samples": 1503, + "number_of_characters": 153825, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 153825 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "mni_Mtei-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 165111, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 165111 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "mni_Mtei-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 158355, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 158355 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "mni_Mtei-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 170598, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 170598 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "mni_Mtei-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 153422, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 153422 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "mni_Mtei-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 157037, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 50.91417165668663, + "max_sentence1_length": 239, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 157037 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "npi_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 160906, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 160906 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "npi_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 155310, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 155310 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "npi_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 161366, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 161366 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "npi_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 166354, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 166354 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "npi_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 160038, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 160038 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "npi_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 155604, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 155604 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "npi_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 157591, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 157591 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "npi_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 159286, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 159286 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "npi_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 164499, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 164499 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "npi_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 164003, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 164003 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "npi_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 161730, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 161730 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "npi_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 172160, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 172160 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "npi_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 162069, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 162069 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "npi_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 156638, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 156638 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "npi_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 163545, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 163545 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "npi_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 159523, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 159523 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "npi_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 157415, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 157415 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "npi_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 168701, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 168701 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "npi_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 161945, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 161945 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "npi_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 174188, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 174188 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "npi_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 157012, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 157012 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "npi_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 160627, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.30272787757818, + "max_sentence1_length": 223, + "unique_sentence1": 1497, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 160627 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "ory_Orya-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 164223, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 164223 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "ory_Orya-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 158627, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 158627 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "ory_Orya-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 164683, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 164683 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "ory_Orya-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 169671, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 169671 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "ory_Orya-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 163355, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 163355 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "ory_Orya-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 158921, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 158921 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "ory_Orya-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 160908, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 160908 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "ory_Orya-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 162603, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 162603 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "ory_Orya-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 167816, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 167816 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "ory_Orya-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 167320, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 167320 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "ory_Orya-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 165047, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 165047 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "ory_Orya-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 175477, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 175477 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "ory_Orya-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 165386, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 165386 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "ory_Orya-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 159955, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 159955 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "ory_Orya-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 163545, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 163545 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "ory_Orya-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 162840, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 162840 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "ory_Orya-san_Deva": { + "num_samples": 1503, + "number_of_characters": 160732, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 160732 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "ory_Orya-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 172018, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 172018 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "ory_Orya-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 165262, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 165262 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "ory_Orya-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 177505, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 177505 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "ory_Orya-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 160329, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 160329 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "ory_Orya-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 163944, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 55.509647371922824, + "max_sentence1_length": 195, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 163944 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "pan_Guru-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 160201, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 160201 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "pan_Guru-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 154605, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 154605 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "pan_Guru-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 160661, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 160661 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "pan_Guru-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 165649, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 165649 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "pan_Guru-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 159333, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 159333 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "pan_Guru-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 154899, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 154899 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "pan_Guru-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 156886, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 156886 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "pan_Guru-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 158581, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 158581 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "pan_Guru-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 163794, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 163794 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "pan_Guru-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 163298, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 163298 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "pan_Guru-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 161025, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 161025 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "pan_Guru-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 171455, + "unique_pairs": 1498, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 171455 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "pan_Guru-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 161364, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 161364 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "pan_Guru-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 155933, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 155933 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "pan_Guru-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 159523, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 159523 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "pan_Guru-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 162840, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 162840 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "pan_Guru-san_Deva": { + "num_samples": 1503, + "number_of_characters": 156710, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 156710 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "pan_Guru-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 167996, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 167996 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "pan_Guru-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 161240, + "unique_pairs": 1498, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 161240 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "pan_Guru-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 173483, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 173483 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "pan_Guru-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 156307, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 156307 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "pan_Guru-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 159922, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 52.83366600133067, + "max_sentence1_length": 221, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 159922 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "san_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 158093, + "unique_pairs": 1501, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 158093 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "san_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 152497, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 152497 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "san_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 158553, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 158553 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "san_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 163541, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 163541 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "san_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 157225, + "unique_pairs": 1501, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 157225 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "san_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 152791, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 152791 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "san_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 154778, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 154778 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "san_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 156473, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 156473 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "san_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 161686, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 161686 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "san_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 161190, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 161190 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "san_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 158917, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 158917 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "san_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 169347, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 169347 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "san_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 159256, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 159256 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "san_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 153825, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 153825 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "san_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 157415, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 157415 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "san_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 160732, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 160732 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "san_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 156710, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 156710 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "san_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 165888, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 165888 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "san_Deva-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 159132, + "unique_pairs": 1501, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 159132 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "san_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 171375, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 171375 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "san_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 154199, + "unique_pairs": 1501, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 154199 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "san_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 157814, + "unique_pairs": 1501, + "min_sentence1_length": 3, "average_sentence1_length": 51.4311377245509, + "max_sentence1_length": 181, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 157814 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "sat_Olck-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 169379, + "unique_pairs": 1502, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 169379 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "sat_Olck-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 163783, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 163783 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "sat_Olck-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 169839, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 169839 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "sat_Olck-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 174827, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 174827 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "sat_Olck-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 168511, + "unique_pairs": 1502, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 168511 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "sat_Olck-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 164077, + "unique_pairs": 1502, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 164077 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "sat_Olck-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 166064, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 166064 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "sat_Olck-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 167759, + "unique_pairs": 1502, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 167759 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "sat_Olck-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 172972, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 172972 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "sat_Olck-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 172476, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 172476 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "sat_Olck-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 170203, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 170203 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "sat_Olck-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 180633, + "unique_pairs": 1501, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 180633 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "sat_Olck-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 170542, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 170542 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "sat_Olck-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 165111, + "unique_pairs": 1502, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 165111 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "sat_Olck-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 168701, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 168701 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "sat_Olck-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 172018, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 172018 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "sat_Olck-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 167996, + "unique_pairs": 1501, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 167996 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "sat_Olck-san_Deva": { + "num_samples": 1503, + "number_of_characters": 165888, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 165888 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "sat_Olck-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 170418, + "unique_pairs": 1501, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 170418 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "sat_Olck-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 182661, + "unique_pairs": 1503, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 182661 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "sat_Olck-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 165485, + "unique_pairs": 1502, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 165485 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "sat_Olck-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 169100, + "unique_pairs": 1502, + "min_sentence1_length": 7, "average_sentence1_length": 58.94011976047904, + "max_sentence1_length": 225, + "unique_sentence1": 1500, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 169100 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "snd_Deva-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 162623, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 162623 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "snd_Deva-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 157027, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 157027 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "snd_Deva-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 163083, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 163083 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "snd_Deva-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 168071, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 168071 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "snd_Deva-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 161755, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 161755 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "snd_Deva-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 157321, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 157321 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "snd_Deva-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 159308, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 159308 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "snd_Deva-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 161003, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 161003 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "snd_Deva-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 166216, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 166216 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "snd_Deva-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 165720, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 165720 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "snd_Deva-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 163447, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 163447 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "snd_Deva-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 173877, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 173877 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "snd_Deva-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 163786, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 163786 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "snd_Deva-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 158355, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 158355 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "snd_Deva-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 161945, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 161945 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "snd_Deva-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 165262, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 165262 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "snd_Deva-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 161240, + "unique_pairs": 1498, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 161240 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "snd_Deva-san_Deva": { + "num_samples": 1503, + "number_of_characters": 159132, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 159132 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "snd_Deva-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 170418, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 170418 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "snd_Deva-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 175905, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 175905 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "snd_Deva-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 158729, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 158729 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "snd_Deva-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 162344, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 54.445109780439125, + "max_sentence1_length": 195, + "unique_sentence1": 1490, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 162344 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "tam_Taml-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 174866, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 174866 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "tam_Taml-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 169270, + "unique_pairs": 1501, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 169270 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "tam_Taml-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 175326, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 175326 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "tam_Taml-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 180314, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 180314 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "tam_Taml-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 173998, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 173998 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "tam_Taml-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 169564, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 169564 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "tam_Taml-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 171551, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 171551 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "tam_Taml-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 173246, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 173246 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "tam_Taml-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 178459, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 178459 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "tam_Taml-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 177963, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 177963 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "tam_Taml-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 175690, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 175690 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "tam_Taml-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 186120, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 186120 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "tam_Taml-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 176029, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 176029 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "tam_Taml-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 170598, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 170598 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "tam_Taml-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 174188, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 174188 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "tam_Taml-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 177505, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 177505 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "tam_Taml-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 173483, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 173483 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "tam_Taml-san_Deva": { + "num_samples": 1503, + "number_of_characters": 171375, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 171375 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "tam_Taml-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 182661, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 182661 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "tam_Taml-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 175905, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 175905 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "tam_Taml-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 170972, + "unique_pairs": 1502, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 170972 + "max_sentence2_length": 182, + "unique_sentence2": 1495 }, "tam_Taml-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 174587, + "unique_pairs": 1503, + "min_sentence1_length": 3, "average_sentence1_length": 62.590818363273456, + "max_sentence1_length": 224, + "unique_sentence1": 1492, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 174587 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "tel_Telu-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 157690, + "unique_pairs": 1499, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 157690 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "tel_Telu-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 152094, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 152094 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "tel_Telu-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 158150, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 158150 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "tel_Telu-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 163138, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 163138 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "tel_Telu-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 156822, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 156822 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "tel_Telu-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 152388, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 152388 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "tel_Telu-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 154375, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 154375 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "tel_Telu-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 156070, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 156070 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "tel_Telu-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 161283, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 161283 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "tel_Telu-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 160787, + "unique_pairs": 1503, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 160787 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "tel_Telu-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 158514, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 158514 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "tel_Telu-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 168944, + "unique_pairs": 1500, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 168944 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "tel_Telu-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 158853, + "unique_pairs": 1503, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 158853 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "tel_Telu-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 153422, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 153422 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "tel_Telu-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 157012, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 157012 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "tel_Telu-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 160329, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 160329 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "tel_Telu-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 156307, + "unique_pairs": 1499, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 156307 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "tel_Telu-san_Deva": { + "num_samples": 1503, + "number_of_characters": 154199, + "unique_pairs": 1501, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 154199 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "tel_Telu-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 165485, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 165485 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "tel_Telu-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 158729, + "unique_pairs": 1499, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 158729 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "tel_Telu-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 170972, + "unique_pairs": 1502, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 170972 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "tel_Telu-urd_Arab": { + "num_samples": 1503, + "number_of_characters": 157411, + "unique_pairs": 1499, + "min_sentence1_length": 6, "average_sentence1_length": 51.16300731869594, + "max_sentence1_length": 182, + "unique_sentence1": 1495, + "min_sentence2_length": 4, "average_sentence2_length": 53.568196939454424, - "num_samples": 1503, - "number_of_characters": 157411 + "max_sentence2_length": 206, + "unique_sentence2": 1498 }, "urd_Arab-asm_Beng": { + "num_samples": 1503, + "number_of_characters": 161305, + "unique_pairs": 1498, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.753825681969396, - "num_samples": 1503, - "number_of_characters": 161305 + "max_sentence2_length": 208, + "unique_sentence2": 1497 }, "urd_Arab-ben_Beng": { + "num_samples": 1503, + "number_of_characters": 155709, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 50.03060545575516, - "num_samples": 1503, - "number_of_characters": 155709 + "max_sentence2_length": 178, + "unique_sentence2": 1497 }, "urd_Arab-brx_Deva": { + "num_samples": 1503, + "number_of_characters": 161765, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 54.05988023952096, - "num_samples": 1503, - "number_of_characters": 161765 + "max_sentence2_length": 210, + "unique_sentence2": 1498 }, "urd_Arab-doi_Deva": { + "num_samples": 1503, + "number_of_characters": 166753, + "unique_pairs": 1500, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 6, "average_sentence2_length": 57.37857618097139, - "num_samples": 1503, - "number_of_characters": 166753 + "max_sentence2_length": 209, + "unique_sentence2": 1499 }, "urd_Arab-eng_Latn": { + "num_samples": 1503, + "number_of_characters": 160437, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.17631403858949, - "num_samples": 1503, - "number_of_characters": 160437 + "max_sentence2_length": 201, + "unique_sentence2": 1497 }, "urd_Arab-gom_Deva": { + "num_samples": 1503, + "number_of_characters": 156003, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 50.22621423819029, - "num_samples": 1503, - "number_of_characters": 156003 + "max_sentence2_length": 203, + "unique_sentence2": 1500 }, "urd_Arab-guj_Gujr": { + "num_samples": 1503, + "number_of_characters": 157990, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 51.54823685961411, - "num_samples": 1503, - "number_of_characters": 157990 + "max_sentence2_length": 205, + "unique_sentence2": 1500 }, "urd_Arab-hin_Deva": { + "num_samples": 1503, + "number_of_characters": 159685, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 52.67598137059215, - "num_samples": 1503, - "number_of_characters": 159685 + "max_sentence2_length": 192, + "unique_sentence2": 1497 }, "urd_Arab-kan_Knda": { + "num_samples": 1503, + "number_of_characters": 164898, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 56.14437791084497, - "num_samples": 1503, - "number_of_characters": 164898 + "max_sentence2_length": 201, + "unique_sentence2": 1499 }, "urd_Arab-kas_Arab": { + "num_samples": 1503, + "number_of_characters": 164402, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 55.81437125748503, - "num_samples": 1503, - "number_of_characters": 164402 + "max_sentence2_length": 203, + "unique_sentence2": 1502 }, "urd_Arab-mai_Deva": { + "num_samples": 1503, + "number_of_characters": 162129, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 54.3020625415835, - "num_samples": 1503, - "number_of_characters": 162129 + "max_sentence2_length": 230, + "unique_sentence2": 1499 }, "urd_Arab-mal_Mlym": { + "num_samples": 1503, + "number_of_characters": 172559, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 5, "average_sentence2_length": 61.24151696606786, - "num_samples": 1503, - "number_of_characters": 172559 + "max_sentence2_length": 219, + "unique_sentence2": 1495 }, "urd_Arab-mar_Deva": { + "num_samples": 1503, + "number_of_characters": 162468, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 54.52761144377911, - "num_samples": 1503, - "number_of_characters": 162468 + "max_sentence2_length": 221, + "unique_sentence2": 1501 }, "urd_Arab-mni_Mtei": { + "num_samples": 1503, + "number_of_characters": 157037, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 50.91417165668663, - "num_samples": 1503, - "number_of_characters": 157037 + "max_sentence2_length": 239, + "unique_sentence2": 1498 }, "urd_Arab-npi_Deva": { + "num_samples": 1503, + "number_of_characters": 160627, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 53.30272787757818, - "num_samples": 1503, - "number_of_characters": 160627 + "max_sentence2_length": 223, + "unique_sentence2": 1497 }, "urd_Arab-ory_Orya": { + "num_samples": 1503, + "number_of_characters": 163944, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 55.509647371922824, - "num_samples": 1503, - "number_of_characters": 163944 + "max_sentence2_length": 195, + "unique_sentence2": 1500 }, "urd_Arab-pan_Guru": { + "num_samples": 1503, + "number_of_characters": 159922, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 52.83366600133067, - "num_samples": 1503, - "number_of_characters": 159922 + "max_sentence2_length": 221, + "unique_sentence2": 1495 }, "urd_Arab-san_Deva": { + "num_samples": 1503, + "number_of_characters": 157814, + "unique_pairs": 1501, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 3, "average_sentence2_length": 51.4311377245509, - "num_samples": 1503, - "number_of_characters": 157814 + "max_sentence2_length": 181, + "unique_sentence2": 1500 }, "urd_Arab-sat_Olck": { + "num_samples": 1503, + "number_of_characters": 169100, + "unique_pairs": 1502, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 7, "average_sentence2_length": 58.94011976047904, - "num_samples": 1503, - "number_of_characters": 169100 + "max_sentence2_length": 225, + "unique_sentence2": 1500 }, "urd_Arab-snd_Deva": { + "num_samples": 1503, + "number_of_characters": 162344, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 4, "average_sentence2_length": 54.445109780439125, - "num_samples": 1503, - "number_of_characters": 162344 + "max_sentence2_length": 195, + "unique_sentence2": 1490 }, "urd_Arab-tam_Taml": { + "num_samples": 1503, + "number_of_characters": 174587, + "unique_pairs": 1503, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 3, "average_sentence2_length": 62.590818363273456, - "num_samples": 1503, - "number_of_characters": 174587 + "max_sentence2_length": 224, + "unique_sentence2": 1492 }, "urd_Arab-tel_Telu": { + "num_samples": 1503, + "number_of_characters": 157411, + "unique_pairs": 1499, + "min_sentence1_length": 4, "average_sentence1_length": 53.568196939454424, + "max_sentence1_length": 206, + "unique_sentence1": 1498, + "min_sentence2_length": 6, "average_sentence2_length": 51.16300731869594, - "num_samples": 1503, - "number_of_characters": 157411 + "max_sentence2_length": 182, + "unique_sentence2": 1495 } } } diff --git a/mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json b/mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json new file mode 100644 index 0000000000..c53818c9ca --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/IN22GenBitextMining.json @@ -0,0 +1,6595 @@ +{ + "test": { + "num_samples": 518144, + "number_of_characters": 162367876, + "unique_pairs": 518101, + "min_sentence1_length": 9, + "average_sentence1_length": 156.6821925951087, + "max_sentence1_length": 692, + "unique_sentence1": 23550, + "min_sentence2_length": 9, + "average_sentence2_length": 156.6821925951087, + "max_sentence2_length": 692, + "unique_sentence2": 23550, + "hf_subset_descriptive_stats": { + "asm_Beng-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 310622, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "asm_Beng-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 323609, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "asm_Beng-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 319020, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "asm_Beng-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 320098, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "asm_Beng-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 312594, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "asm_Beng-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 309440, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "asm_Beng-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 320106, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "asm_Beng-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 332064, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "asm_Beng-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 322764, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "asm_Beng-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 308682, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "asm_Beng-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 343636, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "asm_Beng-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 321784, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "asm_Beng-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 313134, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "asm_Beng-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 313419, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "asm_Beng-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 334226, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "asm_Beng-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 306863, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "asm_Beng-san_Deva": { + "num_samples": 1024, + "number_of_characters": 318079, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "asm_Beng-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 326732, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "asm_Beng-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 320421, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "asm_Beng-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 348346, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "asm_Beng-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 319045, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "asm_Beng-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 315134, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 156.6982421875, + "max_sentence1_length": 582, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "ben_Beng-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 310622, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "ben_Beng-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 313313, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "ben_Beng-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 308724, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "ben_Beng-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 309802, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "ben_Beng-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 302298, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "ben_Beng-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 299144, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "ben_Beng-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 309810, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "ben_Beng-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 321768, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "ben_Beng-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 312468, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "ben_Beng-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 298386, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "ben_Beng-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 333340, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "ben_Beng-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 311488, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "ben_Beng-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 302838, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "ben_Beng-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 303123, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "ben_Beng-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 323930, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "ben_Beng-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 296567, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "ben_Beng-san_Deva": { + "num_samples": 1024, + "number_of_characters": 307783, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "ben_Beng-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 316436, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "ben_Beng-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 310125, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "ben_Beng-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 338050, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "ben_Beng-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 308749, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "ben_Beng-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 304838, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 146.6435546875, + "max_sentence1_length": 538, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "brx_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 323609, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "brx_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 313313, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "brx_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 321711, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "brx_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 322789, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "brx_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 315285, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "brx_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 312131, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "brx_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 322797, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "brx_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 334755, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "brx_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 325455, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "brx_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 311373, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "brx_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 346327, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "brx_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 324475, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "brx_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 315825, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "brx_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 316110, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "brx_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 336917, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "brx_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 309554, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "brx_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 320770, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "brx_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 329423, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "brx_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 323112, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "brx_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 351037, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "brx_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 321736, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "brx_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 317825, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 159.326171875, + "max_sentence1_length": 631, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "doi_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 319020, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "doi_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 308724, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "doi_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 321711, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "doi_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 318200, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "doi_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 310696, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "doi_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 307542, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "doi_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 318208, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "doi_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 330166, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "doi_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 320866, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "doi_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 306784, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "doi_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 341738, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "doi_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 319886, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "doi_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 311236, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "doi_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 311521, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "doi_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 332328, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "doi_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 304965, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "doi_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 316181, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "doi_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 324834, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "doi_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 318523, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "doi_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 346448, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "doi_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 317147, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "doi_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 313236, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.8447265625, + "max_sentence1_length": 500, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "eng_Latn-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 320098, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "eng_Latn-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 309802, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "eng_Latn-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 322789, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "eng_Latn-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 318200, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "eng_Latn-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 311774, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "eng_Latn-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 308620, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "eng_Latn-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 319286, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "eng_Latn-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 331244, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "eng_Latn-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 321944, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "eng_Latn-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 307862, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "eng_Latn-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 342816, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "eng_Latn-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 320964, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "eng_Latn-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 312314, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "eng_Latn-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 312599, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "eng_Latn-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 333406, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "eng_Latn-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 306043, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "eng_Latn-san_Deva": { + "num_samples": 1024, + "number_of_characters": 317259, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "eng_Latn-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 325912, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "eng_Latn-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 319601, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "eng_Latn-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 347526, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "eng_Latn-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 318225, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "eng_Latn-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 314314, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 155.8974609375, + "max_sentence1_length": 532, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "gom_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 312594, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "gom_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 302298, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "gom_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 315285, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "gom_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 310696, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "gom_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 311774, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "gom_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 301116, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "gom_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 311782, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "gom_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 323740, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "gom_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 314440, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "gom_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 300358, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "gom_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 335312, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "gom_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 313460, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "gom_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 304810, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "gom_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 305095, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "gom_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 325902, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "gom_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 298539, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "gom_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 309755, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "gom_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 318408, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "gom_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 312097, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "gom_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 340022, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "gom_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 310721, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "gom_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 306810, + "unique_pairs": 1024, + "min_sentence1_length": 17, + "average_sentence1_length": 148.5693359375, + "max_sentence1_length": 537, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "guj_Gujr-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 309440, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "guj_Gujr-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 299144, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "guj_Gujr-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 312131, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "guj_Gujr-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 307542, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "guj_Gujr-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 308620, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "guj_Gujr-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 301116, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "guj_Gujr-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 308628, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "guj_Gujr-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 320586, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "guj_Gujr-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 311286, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "guj_Gujr-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 297204, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "guj_Gujr-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 332158, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "guj_Gujr-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 310306, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "guj_Gujr-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 301656, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "guj_Gujr-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 301941, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "guj_Gujr-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 322748, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "guj_Gujr-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 295385, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "guj_Gujr-san_Deva": { + "num_samples": 1024, + "number_of_characters": 306601, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "guj_Gujr-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 315254, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "guj_Gujr-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 308943, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "guj_Gujr-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 336868, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "guj_Gujr-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 307567, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "guj_Gujr-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 303656, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 145.4892578125, + "max_sentence1_length": 488, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "hin_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 320106, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "hin_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 309810, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "hin_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 322797, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "hin_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 318208, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "hin_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 319286, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "hin_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 311782, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "hin_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 308628, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "hin_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 331252, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "hin_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 321952, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "hin_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 307870, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "hin_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 342824, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "hin_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 320972, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "hin_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 312322, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "hin_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 312607, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "hin_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 333414, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "hin_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 306051, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "hin_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 317267, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "hin_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 325920, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "hin_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 319609, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "hin_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 347534, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "hin_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 318233, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "hin_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 314322, + "unique_pairs": 1024, + "min_sentence1_length": 21, + "average_sentence1_length": 155.9052734375, + "max_sentence1_length": 531, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "kan_Knda-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 332064, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "kan_Knda-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 321768, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "kan_Knda-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 334755, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "kan_Knda-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 330166, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "kan_Knda-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 331244, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "kan_Knda-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 323740, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "kan_Knda-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 320586, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "kan_Knda-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 331252, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "kan_Knda-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 333910, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "kan_Knda-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 319828, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "kan_Knda-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 354782, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "kan_Knda-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 332930, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "kan_Knda-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 324280, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "kan_Knda-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 324565, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "kan_Knda-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 345372, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "kan_Knda-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 318009, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "kan_Knda-san_Deva": { + "num_samples": 1024, + "number_of_characters": 329225, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "kan_Knda-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 337878, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "kan_Knda-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 331567, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "kan_Knda-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 359492, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "kan_Knda-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 330191, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "kan_Knda-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 326280, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 167.5830078125, + "max_sentence1_length": 668, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "kas_Arab-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 322764, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "kas_Arab-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 312468, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "kas_Arab-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 325455, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "kas_Arab-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 320866, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "kas_Arab-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 321944, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "kas_Arab-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 314440, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "kas_Arab-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 311286, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "kas_Arab-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 321952, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "kas_Arab-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 333910, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "kas_Arab-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 310528, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "kas_Arab-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 345482, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "kas_Arab-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 323630, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "kas_Arab-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 314980, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "kas_Arab-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 315265, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "kas_Arab-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 336072, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "kas_Arab-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 308709, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "kas_Arab-san_Deva": { + "num_samples": 1024, + "number_of_characters": 319925, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "kas_Arab-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 328578, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "kas_Arab-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 322267, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "kas_Arab-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 350192, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "kas_Arab-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 320891, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "kas_Arab-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 316980, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 158.5009765625, + "max_sentence1_length": 520, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "mai_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 308682, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "mai_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 298386, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "mai_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 311373, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "mai_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 306784, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "mai_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 307862, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "mai_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 300358, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "mai_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 297204, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "mai_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 307870, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "mai_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 319828, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "mai_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 310528, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "mai_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 331400, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "mai_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 309548, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "mai_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 300898, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "mai_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 301183, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "mai_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 321990, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "mai_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 294627, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "mai_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 305843, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "mai_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 314496, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "mai_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 308185, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "mai_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 336110, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "mai_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 306809, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "mai_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 302898, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 144.7490234375, + "max_sentence1_length": 562, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "mal_Mlym-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 343636, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "mal_Mlym-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 333340, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "mal_Mlym-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 346327, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "mal_Mlym-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 341738, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "mal_Mlym-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 342816, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "mal_Mlym-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 335312, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "mal_Mlym-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 332158, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "mal_Mlym-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 342824, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "mal_Mlym-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 354782, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "mal_Mlym-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 345482, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "mal_Mlym-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 331400, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "mal_Mlym-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 344502, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "mal_Mlym-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 335852, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "mal_Mlym-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 336137, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "mal_Mlym-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 356944, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "mal_Mlym-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 329581, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "mal_Mlym-san_Deva": { + "num_samples": 1024, + "number_of_characters": 340797, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "mal_Mlym-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 349450, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "mal_Mlym-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 343139, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "mal_Mlym-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 371064, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "mal_Mlym-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 341763, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "mal_Mlym-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 337852, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 178.8837890625, + "max_sentence1_length": 692, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "mar_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 321784, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "mar_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 311488, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "mar_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 324475, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "mar_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 319886, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "mar_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 320964, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "mar_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 313460, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "mar_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 310306, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "mar_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 320972, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "mar_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 332930, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "mar_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 323630, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "mar_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 309548, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "mar_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 344502, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "mar_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 314000, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "mar_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 314285, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "mar_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 335092, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "mar_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 307729, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "mar_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 318945, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "mar_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 327598, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "mar_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 321287, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "mar_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 349212, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "mar_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 319911, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "mar_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 316000, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 157.5439453125, + "max_sentence1_length": 555, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "mni_Mtei-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 313134, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "mni_Mtei-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 302838, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "mni_Mtei-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 315825, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "mni_Mtei-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 311236, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "mni_Mtei-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 312314, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "mni_Mtei-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 304810, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "mni_Mtei-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 301656, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "mni_Mtei-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 312322, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "mni_Mtei-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 324280, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "mni_Mtei-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 314980, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "mni_Mtei-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 300898, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "mni_Mtei-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 335852, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "mni_Mtei-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 314000, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "mni_Mtei-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 305635, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "mni_Mtei-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 326442, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "mni_Mtei-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 299079, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "mni_Mtei-san_Deva": { + "num_samples": 1024, + "number_of_characters": 310295, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "mni_Mtei-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 318948, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "mni_Mtei-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 312637, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "mni_Mtei-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 340562, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "mni_Mtei-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 311261, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "mni_Mtei-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 307350, + "unique_pairs": 1024, + "min_sentence1_length": 16, + "average_sentence1_length": 149.0966796875, + "max_sentence1_length": 597, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "npi_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 313419, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "npi_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 303123, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "npi_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 316110, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "npi_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 311521, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "npi_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 312599, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "npi_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 305095, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "npi_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 301941, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "npi_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 312607, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "npi_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 324565, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "npi_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 315265, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "npi_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 301183, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "npi_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 336137, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "npi_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 314285, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "npi_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 305635, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "npi_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 326727, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "npi_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 299364, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "npi_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 310580, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "npi_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 319233, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "npi_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 312922, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "npi_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 340847, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "npi_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 311546, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "npi_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 307635, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 149.375, + "max_sentence1_length": 525, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "ory_Orya-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 334226, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "ory_Orya-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 323930, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "ory_Orya-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 336917, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "ory_Orya-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 332328, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "ory_Orya-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 333406, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "ory_Orya-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 325902, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "ory_Orya-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 322748, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "ory_Orya-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 333414, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "ory_Orya-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 345372, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "ory_Orya-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 336072, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "ory_Orya-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 321990, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "ory_Orya-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 356944, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "ory_Orya-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 335092, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "ory_Orya-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 326442, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "ory_Orya-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 326727, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "ory_Orya-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 320171, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "ory_Orya-san_Deva": { + "num_samples": 1024, + "number_of_characters": 331387, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "ory_Orya-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 340040, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "ory_Orya-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 333729, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "ory_Orya-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 361654, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "ory_Orya-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 332353, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "ory_Orya-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 328442, + "unique_pairs": 1024, + "min_sentence1_length": 10, + "average_sentence1_length": 169.6943359375, + "max_sentence1_length": 578, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "pan_Guru-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 306863, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "pan_Guru-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 296567, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "pan_Guru-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 309554, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "pan_Guru-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 304965, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "pan_Guru-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 306043, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "pan_Guru-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 298539, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "pan_Guru-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 295385, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "pan_Guru-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 306051, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "pan_Guru-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 318009, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "pan_Guru-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 308709, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "pan_Guru-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 294627, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "pan_Guru-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 329581, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "pan_Guru-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 307729, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "pan_Guru-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 299079, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "pan_Guru-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 299364, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "pan_Guru-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 320171, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "pan_Guru-san_Deva": { + "num_samples": 1024, + "number_of_characters": 304024, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "pan_Guru-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 312677, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "pan_Guru-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 306366, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "pan_Guru-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 334291, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "pan_Guru-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 304990, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "pan_Guru-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 301079, + "unique_pairs": 1024, + "min_sentence1_length": 19, + "average_sentence1_length": 142.97265625, + "max_sentence1_length": 476, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "san_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 318079, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "san_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 307783, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "san_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 320770, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "san_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 316181, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "san_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 317259, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "san_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 309755, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "san_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 306601, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "san_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 317267, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "san_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 329225, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "san_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 319925, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "san_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 305843, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "san_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 340797, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "san_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 318945, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "san_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 310295, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "san_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 310580, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "san_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 331387, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "san_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 304024, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "san_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 323893, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "san_Deva-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 317582, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "san_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 345507, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "san_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 316206, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "san_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 312295, + "unique_pairs": 1024, + "min_sentence1_length": 9, + "average_sentence1_length": 153.92578125, + "max_sentence1_length": 601, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "sat_Olck-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 326732, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "sat_Olck-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 316436, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "sat_Olck-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 329423, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "sat_Olck-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 324834, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "sat_Olck-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 325912, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "sat_Olck-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 318408, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "sat_Olck-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 315254, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "sat_Olck-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 325920, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "sat_Olck-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 337878, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "sat_Olck-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 328578, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "sat_Olck-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 314496, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "sat_Olck-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 349450, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "sat_Olck-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 327598, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "sat_Olck-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 318948, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "sat_Olck-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 319233, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "sat_Olck-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 340040, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "sat_Olck-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 312677, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "sat_Olck-san_Deva": { + "num_samples": 1024, + "number_of_characters": 323893, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "sat_Olck-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 326235, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "sat_Olck-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 354160, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "sat_Olck-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 324859, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "sat_Olck-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 320948, + "unique_pairs": 1024, + "min_sentence1_length": 11, + "average_sentence1_length": 162.3759765625, + "max_sentence1_length": 536, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "snd_Deva-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 320421, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "snd_Deva-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 310125, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "snd_Deva-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 323112, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "snd_Deva-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 318523, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "snd_Deva-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 319601, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "snd_Deva-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 312097, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "snd_Deva-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 308943, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "snd_Deva-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 319609, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "snd_Deva-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 331567, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "snd_Deva-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 322267, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "snd_Deva-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 308185, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "snd_Deva-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 343139, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "snd_Deva-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 321287, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "snd_Deva-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 312637, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "snd_Deva-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 312922, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "snd_Deva-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 333729, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "snd_Deva-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 306366, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "snd_Deva-san_Deva": { + "num_samples": 1024, + "number_of_characters": 317582, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "snd_Deva-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 326235, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "snd_Deva-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 347849, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "snd_Deva-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 318548, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "snd_Deva-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 314637, + "unique_pairs": 1024, + "min_sentence1_length": 18, + "average_sentence1_length": 156.212890625, + "max_sentence1_length": 545, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "tam_Taml-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 348346, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "tam_Taml-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 338050, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "tam_Taml-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 351037, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "tam_Taml-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 346448, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "tam_Taml-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 347526, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "tam_Taml-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 340022, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "tam_Taml-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 336868, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "tam_Taml-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 347534, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "tam_Taml-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 359492, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "tam_Taml-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 350192, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "tam_Taml-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 336110, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "tam_Taml-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 371064, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "tam_Taml-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 349212, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "tam_Taml-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 340562, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "tam_Taml-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 340847, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "tam_Taml-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 361654, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "tam_Taml-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 334291, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "tam_Taml-san_Deva": { + "num_samples": 1024, + "number_of_characters": 345507, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "tam_Taml-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 354160, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "tam_Taml-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 347849, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "tam_Taml-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 346473, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + }, + "tam_Taml-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 342562, + "unique_pairs": 1024, + "min_sentence1_length": 32, + "average_sentence1_length": 183.4833984375, + "max_sentence1_length": 614, + "unique_sentence1": 1023, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "tel_Telu-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 319045, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "tel_Telu-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 308749, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "tel_Telu-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 321736, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "tel_Telu-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 317147, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "tel_Telu-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 318225, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "tel_Telu-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 310721, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "tel_Telu-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 307567, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "tel_Telu-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 318233, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "tel_Telu-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 330191, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "tel_Telu-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 320891, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "tel_Telu-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 306809, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "tel_Telu-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 341763, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "tel_Telu-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 319911, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "tel_Telu-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 311261, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "tel_Telu-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 311546, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "tel_Telu-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 332353, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "tel_Telu-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 304990, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "tel_Telu-san_Deva": { + "num_samples": 1024, + "number_of_characters": 316206, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "tel_Telu-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 324859, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "tel_Telu-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 318548, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "tel_Telu-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 346473, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "tel_Telu-urd_Arab": { + "num_samples": 1024, + "number_of_characters": 313261, + "unique_pairs": 1024, + "min_sentence1_length": 14, + "average_sentence1_length": 154.869140625, + "max_sentence1_length": 658, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 151.0498046875, + "max_sentence2_length": 574, + "unique_sentence2": 1024 + }, + "urd_Arab-asm_Beng": { + "num_samples": 1024, + "number_of_characters": 315134, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 156.6982421875, + "max_sentence2_length": 582, + "unique_sentence2": 1024 + }, + "urd_Arab-ben_Beng": { + "num_samples": 1024, + "number_of_characters": 304838, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 146.6435546875, + "max_sentence2_length": 538, + "unique_sentence2": 1024 + }, + "urd_Arab-brx_Deva": { + "num_samples": 1024, + "number_of_characters": 317825, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 159.326171875, + "max_sentence2_length": 631, + "unique_sentence2": 1024 + }, + "urd_Arab-doi_Deva": { + "num_samples": 1024, + "number_of_characters": 313236, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.8447265625, + "max_sentence2_length": 500, + "unique_sentence2": 1024 + }, + "urd_Arab-eng_Latn": { + "num_samples": 1024, + "number_of_characters": 314314, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 155.8974609375, + "max_sentence2_length": 532, + "unique_sentence2": 1024 + }, + "urd_Arab-gom_Deva": { + "num_samples": 1024, + "number_of_characters": 306810, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 17, + "average_sentence2_length": 148.5693359375, + "max_sentence2_length": 537, + "unique_sentence2": 1024 + }, + "urd_Arab-guj_Gujr": { + "num_samples": 1024, + "number_of_characters": 303656, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 145.4892578125, + "max_sentence2_length": 488, + "unique_sentence2": 1024 + }, + "urd_Arab-hin_Deva": { + "num_samples": 1024, + "number_of_characters": 314322, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 21, + "average_sentence2_length": 155.9052734375, + "max_sentence2_length": 531, + "unique_sentence2": 1024 + }, + "urd_Arab-kan_Knda": { + "num_samples": 1024, + "number_of_characters": 326280, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 167.5830078125, + "max_sentence2_length": 668, + "unique_sentence2": 1024 + }, + "urd_Arab-kas_Arab": { + "num_samples": 1024, + "number_of_characters": 316980, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 158.5009765625, + "max_sentence2_length": 520, + "unique_sentence2": 1024 + }, + "urd_Arab-mai_Deva": { + "num_samples": 1024, + "number_of_characters": 302898, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 144.7490234375, + "max_sentence2_length": 562, + "unique_sentence2": 1024 + }, + "urd_Arab-mal_Mlym": { + "num_samples": 1024, + "number_of_characters": 337852, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 13, + "average_sentence2_length": 178.8837890625, + "max_sentence2_length": 692, + "unique_sentence2": 1024 + }, + "urd_Arab-mar_Deva": { + "num_samples": 1024, + "number_of_characters": 316000, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 157.5439453125, + "max_sentence2_length": 555, + "unique_sentence2": 1024 + }, + "urd_Arab-mni_Mtei": { + "num_samples": 1024, + "number_of_characters": 307350, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 16, + "average_sentence2_length": 149.0966796875, + "max_sentence2_length": 597, + "unique_sentence2": 1024 + }, + "urd_Arab-npi_Deva": { + "num_samples": 1024, + "number_of_characters": 307635, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 149.375, + "max_sentence2_length": 525, + "unique_sentence2": 1024 + }, + "urd_Arab-ory_Orya": { + "num_samples": 1024, + "number_of_characters": 328442, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 10, + "average_sentence2_length": 169.6943359375, + "max_sentence2_length": 578, + "unique_sentence2": 1024 + }, + "urd_Arab-pan_Guru": { + "num_samples": 1024, + "number_of_characters": 301079, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 19, + "average_sentence2_length": 142.97265625, + "max_sentence2_length": 476, + "unique_sentence2": 1024 + }, + "urd_Arab-san_Deva": { + "num_samples": 1024, + "number_of_characters": 312295, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 9, + "average_sentence2_length": 153.92578125, + "max_sentence2_length": 601, + "unique_sentence2": 1024 + }, + "urd_Arab-sat_Olck": { + "num_samples": 1024, + "number_of_characters": 320948, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 11, + "average_sentence2_length": 162.3759765625, + "max_sentence2_length": 536, + "unique_sentence2": 1024 + }, + "urd_Arab-snd_Deva": { + "num_samples": 1024, + "number_of_characters": 314637, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 18, + "average_sentence2_length": 156.212890625, + "max_sentence2_length": 545, + "unique_sentence2": 1024 + }, + "urd_Arab-tam_Taml": { + "num_samples": 1024, + "number_of_characters": 342562, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 32, + "average_sentence2_length": 183.4833984375, + "max_sentence2_length": 614, + "unique_sentence2": 1023 + }, + "urd_Arab-tel_Telu": { + "num_samples": 1024, + "number_of_characters": 313261, + "unique_pairs": 1024, + "min_sentence1_length": 13, + "average_sentence1_length": 151.0498046875, + "max_sentence1_length": 574, + "unique_sentence1": 1024, + "min_sentence2_length": 14, + "average_sentence2_length": 154.869140625, + "max_sentence2_length": 658, + "unique_sentence2": 1024 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json b/mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json new file mode 100644 index 0000000000..504c3f1905 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/IWSLT2017BitextMining.json @@ -0,0 +1,329 @@ +{ + "validation": { + "num_samples": 21938, + "number_of_characters": 4256244, + "unique_pairs": 21840, + "min_sentence1_length": 2, + "average_sentence1_length": 97.0061992889051, + "max_sentence1_length": 521, + "unique_sentence1": 11563, + "min_sentence2_length": 2, + "average_sentence2_length": 97.0061992889051, + "max_sentence2_length": 521, + "unique_sentence2": 11563, + "hf_subset_descriptive_stats": { + "ar-en": { + "num_samples": 888, + "number_of_characters": 172499, + "unique_pairs": 887, + "min_sentence1_length": 4, + "average_sentence1_length": 85.48873873873873, + "max_sentence1_length": 369, + "unique_sentence1": 887, + "min_sentence2_length": 10, + "average_sentence2_length": 108.76689189189189, + "max_sentence2_length": 462, + "unique_sentence2": 881 + }, + "de-en": { + "num_samples": 888, + "number_of_characters": 202336, + "unique_pairs": 883, + "min_sentence1_length": 6, + "average_sentence1_length": 119.02702702702703, + "max_sentence1_length": 521, + "unique_sentence1": 881, + "min_sentence2_length": 10, + "average_sentence2_length": 108.82882882882883, + "max_sentence2_length": 462, + "unique_sentence2": 881 + }, + "en-ar": { + "num_samples": 888, + "number_of_characters": 172499, + "unique_pairs": 887, + "min_sentence1_length": 10, + "average_sentence1_length": 108.76689189189189, + "max_sentence1_length": 462, + "unique_sentence1": 881, + "min_sentence2_length": 4, + "average_sentence2_length": 85.48873873873873, + "max_sentence2_length": 369, + "unique_sentence2": 887 + }, + "en-de": { + "num_samples": 888, + "number_of_characters": 202336, + "unique_pairs": 883, + "min_sentence1_length": 10, + "average_sentence1_length": 108.82882882882883, + "max_sentence1_length": 462, + "unique_sentence1": 881, + "min_sentence2_length": 6, + "average_sentence2_length": 119.02702702702703, + "max_sentence2_length": 521, + "unique_sentence2": 881 + }, + "en-fr": { + "num_samples": 890, + "number_of_characters": 197619, + "unique_pairs": 883, + "min_sentence1_length": 10, + "average_sentence1_length": 108.4123595505618, + "max_sentence1_length": 462, + "unique_sentence1": 883, + "min_sentence2_length": 6, + "average_sentence2_length": 113.63146067415731, + "max_sentence2_length": 493, + "unique_sentence2": 881 + }, + "en-it": { + "num_samples": 929, + "number_of_characters": 191803, + "unique_pairs": 924, + "min_sentence1_length": 10, + "average_sentence1_length": 103.0010764262648, + "max_sentence1_length": 433, + "unique_sentence1": 922, + "min_sentence2_length": 7, + "average_sentence2_length": 103.46071044133477, + "max_sentence2_length": 444, + "unique_sentence2": 918 + }, + "en-ja": { + "num_samples": 871, + "number_of_characters": 132742, + "unique_pairs": 867, + "min_sentence1_length": 10, + "average_sentence1_length": 109.80826636050517, + "max_sentence1_length": 462, + "unique_sentence1": 864, + "min_sentence2_length": 5, + "average_sentence2_length": 42.59357060849598, + "max_sentence2_length": 225, + "unique_sentence2": 866 + }, + "en-ko": { + "num_samples": 879, + "number_of_characters": 142659, + "unique_pairs": 874, + "min_sentence1_length": 10, + "average_sentence1_length": 107.74175199089875, + "max_sentence1_length": 462, + "unique_sentence1": 872, + "min_sentence2_length": 3, + "average_sentence2_length": 54.5551763367463, + "max_sentence2_length": 250, + "unique_sentence2": 872 + }, + "en-nl": { + "num_samples": 1003, + "number_of_characters": 189637, + "unique_pairs": 1000, + "min_sentence1_length": 10, + "average_sentence1_length": 95.26819541375872, + "max_sentence1_length": 433, + "unique_sentence1": 996, + "min_sentence2_length": 4, + "average_sentence2_length": 93.80159521435692, + "max_sentence2_length": 477, + "unique_sentence2": 1000 + }, + "en-ro": { + "num_samples": 914, + "number_of_characters": 194128, + "unique_pairs": 910, + "min_sentence1_length": 10, + "average_sentence1_length": 104.72100656455142, + "max_sentence1_length": 433, + "unique_sentence1": 907, + "min_sentence2_length": 9, + "average_sentence2_length": 107.67286652078775, + "max_sentence2_length": 448, + "unique_sentence2": 910 + }, + "en-zh": { + "num_samples": 879, + "number_of_characters": 131126, + "unique_pairs": 877, + "min_sentence1_length": 10, + "average_sentence1_length": 109.36518771331058, + "max_sentence1_length": 462, + "unique_sentence1": 872, + "min_sentence2_length": 2, + "average_sentence2_length": 39.811149032992034, + "max_sentence2_length": 230, + "unique_sentence2": 867 + }, + "fr-en": { + "num_samples": 890, + "number_of_characters": 197619, + "unique_pairs": 883, + "min_sentence1_length": 6, + "average_sentence1_length": 113.63146067415731, + "max_sentence1_length": 493, + "unique_sentence1": 881, + "min_sentence2_length": 10, + "average_sentence2_length": 108.4123595505618, + "max_sentence2_length": 462, + "unique_sentence2": 883 + }, + "it-en": { + "num_samples": 929, + "number_of_characters": 191803, + "unique_pairs": 924, + "min_sentence1_length": 7, + "average_sentence1_length": 103.46071044133477, + "max_sentence1_length": 444, + "unique_sentence1": 918, + "min_sentence2_length": 10, + "average_sentence2_length": 103.0010764262648, + "max_sentence2_length": 433, + "unique_sentence2": 922 + }, + "it-nl": { + "num_samples": 1001, + "number_of_characters": 188858, + "unique_pairs": 998, + "min_sentence1_length": 7, + "average_sentence1_length": 94.64235764235764, + "max_sentence1_length": 459, + "unique_sentence1": 994, + "min_sentence2_length": 7, + "average_sentence2_length": 94.02697302697302, + "max_sentence2_length": 505, + "unique_sentence2": 998 + }, + "it-ro": { + "num_samples": 914, + "number_of_characters": 193339, + "unique_pairs": 911, + "min_sentence1_length": 7, + "average_sentence1_length": 103.90809628008753, + "max_sentence1_length": 435, + "unique_sentence1": 907, + "min_sentence2_length": 9, + "average_sentence2_length": 107.62253829321664, + "max_sentence2_length": 448, + "unique_sentence2": 910 + }, + "ja-en": { + "num_samples": 871, + "number_of_characters": 132742, + "unique_pairs": 867, + "min_sentence1_length": 5, + "average_sentence1_length": 42.59357060849598, + "max_sentence1_length": 225, + "unique_sentence1": 866, + "min_sentence2_length": 10, + "average_sentence2_length": 109.80826636050517, + "max_sentence2_length": 462, + "unique_sentence2": 864 + }, + "ko-en": { + "num_samples": 879, + "number_of_characters": 142659, + "unique_pairs": 874, + "min_sentence1_length": 3, + "average_sentence1_length": 54.5551763367463, + "max_sentence1_length": 250, + "unique_sentence1": 872, + "min_sentence2_length": 10, + "average_sentence2_length": 107.74175199089875, + "max_sentence2_length": 462, + "unique_sentence2": 872 + }, + "nl-en": { + "num_samples": 1003, + "number_of_characters": 189637, + "unique_pairs": 1000, + "min_sentence1_length": 4, + "average_sentence1_length": 93.80159521435692, + "max_sentence1_length": 477, + "unique_sentence1": 1000, + "min_sentence2_length": 10, + "average_sentence2_length": 95.26819541375872, + "max_sentence2_length": 433, + "unique_sentence2": 996 + }, + "nl-it": { + "num_samples": 1001, + "number_of_characters": 188858, + "unique_pairs": 998, + "min_sentence1_length": 7, + "average_sentence1_length": 94.02697302697302, + "max_sentence1_length": 505, + "unique_sentence1": 998, + "min_sentence2_length": 7, + "average_sentence2_length": 94.64235764235764, + "max_sentence2_length": 459, + "unique_sentence2": 994 + }, + "nl-ro": { + "num_samples": 913, + "number_of_characters": 191376, + "unique_pairs": 911, + "min_sentence1_length": 7, + "average_sentence1_length": 102.01971522453451, + "max_sentence1_length": 478, + "unique_sentence1": 909, + "min_sentence2_length": 9, + "average_sentence2_length": 107.59255202628697, + "max_sentence2_length": 515, + "unique_sentence2": 909 + }, + "ro-en": { + "num_samples": 914, + "number_of_characters": 194128, + "unique_pairs": 910, + "min_sentence1_length": 9, + "average_sentence1_length": 107.67286652078775, + "max_sentence1_length": 448, + "unique_sentence1": 910, + "min_sentence2_length": 10, + "average_sentence2_length": 104.72100656455142, + "max_sentence2_length": 433, + "unique_sentence2": 907 + }, + "ro-it": { + "num_samples": 914, + "number_of_characters": 193339, + "unique_pairs": 911, + "min_sentence1_length": 9, + "average_sentence1_length": 107.62253829321664, + "max_sentence1_length": 448, + "unique_sentence1": 910, + "min_sentence2_length": 7, + "average_sentence2_length": 103.90809628008753, + "max_sentence2_length": 435, + "unique_sentence2": 907 + }, + "ro-nl": { + "num_samples": 913, + "number_of_characters": 191376, + "unique_pairs": 911, + "min_sentence1_length": 9, + "average_sentence1_length": 107.59255202628697, + "max_sentence1_length": 515, + "unique_sentence1": 909, + "min_sentence2_length": 7, + "average_sentence2_length": 102.01971522453451, + "max_sentence2_length": 478, + "unique_sentence2": 909 + }, + "zh-en": { + "num_samples": 879, + "number_of_characters": 131126, + "unique_pairs": 877, + "min_sentence1_length": 2, + "average_sentence1_length": 39.811149032992034, + "max_sentence1_length": 230, + "unique_sentence1": 867, + "min_sentence2_length": 10, + "average_sentence2_length": 109.36518771331058, + "max_sentence2_length": 462, + "unique_sentence2": 872 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json b/mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json new file mode 100644 index 0000000000..1aaed39454 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/IndicGenBenchFloresBitextMining.json @@ -0,0 +1,1540 @@ +{ + "validation": { + "num_samples": 57826, + "number_of_characters": 14600950, + "unique_pairs": 57826, + "min_sentence1_length": 24, + "average_sentence1_length": 126.2541071490333, + "max_sentence1_length": 368, + "unique_sentence1": 29903, + "min_sentence2_length": 24, + "average_sentence2_length": 126.24390412617161, + "max_sentence2_length": 368, + "unique_sentence2": 29903, + "hf_subset_descriptive_stats": { + "ben-eng": { + "num_samples": 997, + "number_of_characters": 248469, + "unique_pairs": 997, + "min_sentence1_length": 30, + "average_sentence1_length": 123.64593781344033, + "max_sentence1_length": 320, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-ben": { + "num_samples": 997, + "number_of_characters": 248469, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 30, + "average_sentence2_length": 123.64593781344033, + "max_sentence2_length": 320, + "unique_sentence2": 997 + }, + "guj-eng": { + "num_samples": 997, + "number_of_characters": 245477, + "unique_pairs": 997, + "min_sentence1_length": 30, + "average_sentence1_length": 120.64493480441324, + "max_sentence1_length": 368, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-guj": { + "num_samples": 997, + "number_of_characters": 245477, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 30, + "average_sentence2_length": 120.64493480441324, + "max_sentence2_length": 368, + "unique_sentence2": 997 + }, + "hin-eng": { + "num_samples": 997, + "number_of_characters": 250573, + "unique_pairs": 997, + "min_sentence1_length": 31, + "average_sentence1_length": 125.75626880641926, + "max_sentence1_length": 355, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-hin": { + "num_samples": 997, + "number_of_characters": 250564, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 31, + "average_sentence2_length": 125.74724172517553, + "max_sentence2_length": 355, + "unique_sentence2": 997 + }, + "kan-eng": { + "num_samples": 997, + "number_of_characters": 257131, + "unique_pairs": 997, + "min_sentence1_length": 34, + "average_sentence1_length": 132.33400200601807, + "max_sentence1_length": 331, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-kan": { + "num_samples": 997, + "number_of_characters": 256986, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 34, + "average_sentence2_length": 132.18856569709126, + "max_sentence2_length": 331, + "unique_sentence2": 997 + }, + "mal-eng": { + "num_samples": 997, + "number_of_characters": 267295, + "unique_pairs": 997, + "min_sentence1_length": 31, + "average_sentence1_length": 142.52858575727183, + "max_sentence1_length": 360, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-mal": { + "num_samples": 997, + "number_of_characters": 267296, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 31, + "average_sentence2_length": 142.5295887662989, + "max_sentence2_length": 360, + "unique_sentence2": 997 + }, + "mar-eng": { + "num_samples": 997, + "number_of_characters": 251107, + "unique_pairs": 997, + "min_sentence1_length": 29, + "average_sentence1_length": 126.29187562688064, + "max_sentence1_length": 321, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-mar": { + "num_samples": 997, + "number_of_characters": 250897, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 29, + "average_sentence2_length": 126.08124373119358, + "max_sentence2_length": 321, + "unique_sentence2": 997 + }, + "tam-eng": { + "num_samples": 997, + "number_of_characters": 271322, + "unique_pairs": 997, + "min_sentence1_length": 30, + "average_sentence1_length": 146.567703109328, + "max_sentence1_length": 358, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-tam": { + "num_samples": 997, + "number_of_characters": 271322, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 30, + "average_sentence2_length": 146.567703109328, + "max_sentence2_length": 358, + "unique_sentence2": 997 + }, + "tel-eng": { + "num_samples": 997, + "number_of_characters": 252385, + "unique_pairs": 997, + "min_sentence1_length": 29, + "average_sentence1_length": 127.57372116349048, + "max_sentence1_length": 317, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-tel": { + "num_samples": 997, + "number_of_characters": 252380, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 29, + "average_sentence2_length": 127.56870611835507, + "max_sentence2_length": 317, + "unique_sentence2": 997 + }, + "urd-eng": { + "num_samples": 997, + "number_of_characters": 249824, + "unique_pairs": 997, + "min_sentence1_length": 37, + "average_sentence1_length": 125.00501504513541, + "max_sentence1_length": 295, + "unique_sentence1": 996, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-urd": { + "num_samples": 997, + "number_of_characters": 249824, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 37, + "average_sentence2_length": 125.00501504513541, + "max_sentence2_length": 295, + "unique_sentence2": 996 + }, + "asm-eng": { + "num_samples": 997, + "number_of_characters": 246220, + "unique_pairs": 997, + "min_sentence1_length": 30, + "average_sentence1_length": 121.3901705115346, + "max_sentence1_length": 314, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-asm": { + "num_samples": 997, + "number_of_characters": 246224, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 30, + "average_sentence2_length": 121.39418254764293, + "max_sentence2_length": 314, + "unique_sentence2": 997 + }, + "bho-eng": { + "num_samples": 997, + "number_of_characters": 246895, + "unique_pairs": 997, + "min_sentence1_length": 25, + "average_sentence1_length": 122.06720160481444, + "max_sentence1_length": 326, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-bho": { + "num_samples": 997, + "number_of_characters": 246919, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 25, + "average_sentence2_length": 122.0912738214644, + "max_sentence2_length": 326, + "unique_sentence2": 997 + }, + "nep-eng": { + "num_samples": 997, + "number_of_characters": 245984, + "unique_pairs": 997, + "min_sentence1_length": 24, + "average_sentence1_length": 121.15346038114343, + "max_sentence1_length": 307, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-nep": { + "num_samples": 997, + "number_of_characters": 245984, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 24, + "average_sentence2_length": 121.15346038114343, + "max_sentence2_length": 307, + "unique_sentence2": 997 + }, + "ory-eng": { + "num_samples": 997, + "number_of_characters": 254206, + "unique_pairs": 997, + "min_sentence1_length": 34, + "average_sentence1_length": 129.4002006018054, + "max_sentence1_length": 308, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-ory": { + "num_samples": 997, + "number_of_characters": 254206, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 34, + "average_sentence2_length": 129.4002006018054, + "max_sentence2_length": 308, + "unique_sentence2": 997 + }, + "pan-eng": { + "num_samples": 997, + "number_of_characters": 251598, + "unique_pairs": 997, + "min_sentence1_length": 29, + "average_sentence1_length": 126.78435305917753, + "max_sentence1_length": 309, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-pan": { + "num_samples": 997, + "number_of_characters": 251597, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 29, + "average_sentence2_length": 126.78335005015045, + "max_sentence2_length": 309, + "unique_sentence2": 997 + }, + "pus-eng": { + "num_samples": 997, + "number_of_characters": 247450, + "unique_pairs": 997, + "min_sentence1_length": 32, + "average_sentence1_length": 122.62387161484453, + "max_sentence1_length": 300, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-pus": { + "num_samples": 997, + "number_of_characters": 247450, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 32, + "average_sentence2_length": 122.62387161484453, + "max_sentence2_length": 300, + "unique_sentence2": 997 + }, + "san-eng": { + "num_samples": 997, + "number_of_characters": 249042, + "unique_pairs": 997, + "min_sentence1_length": 31, + "average_sentence1_length": 124.22066198595788, + "max_sentence1_length": 311, + "unique_sentence1": 994, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-san": { + "num_samples": 997, + "number_of_characters": 248877, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 31, + "average_sentence2_length": 124.05516549648947, + "max_sentence2_length": 311, + "unique_sentence2": 994 + }, + "awa-eng": { + "num_samples": 997, + "number_of_characters": 247944, + "unique_pairs": 997, + "min_sentence1_length": 34, + "average_sentence1_length": 123.11935807422267, + "max_sentence1_length": 329, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-awa": { + "num_samples": 997, + "number_of_characters": 247884, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 34, + "average_sentence2_length": 123.05917753259779, + "max_sentence2_length": 329, + "unique_sentence2": 997 + }, + "bgc-eng": { + "num_samples": 997, + "number_of_characters": 245935, + "unique_pairs": 997, + "min_sentence1_length": 27, + "average_sentence1_length": 121.10431293881645, + "max_sentence1_length": 303, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-bgc": { + "num_samples": 997, + "number_of_characters": 245935, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 27, + "average_sentence2_length": 121.10431293881645, + "max_sentence2_length": 303, + "unique_sentence2": 997 + }, + "bod-eng": { + "num_samples": 997, + "number_of_characters": 266515, + "unique_pairs": 997, + "min_sentence1_length": 26, + "average_sentence1_length": 141.74623871614844, + "max_sentence1_length": 355, + "unique_sentence1": 996, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-bod": { + "num_samples": 997, + "number_of_characters": 266495, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 26, + "average_sentence2_length": 141.72617853560683, + "max_sentence2_length": 355, + "unique_sentence2": 996 + }, + "boy-eng": { + "num_samples": 997, + "number_of_characters": 260174, + "unique_pairs": 997, + "min_sentence1_length": 31, + "average_sentence1_length": 135.38615847542627, + "max_sentence1_length": 312, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-boy": { + "num_samples": 997, + "number_of_characters": 260174, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 31, + "average_sentence2_length": 135.38615847542627, + "max_sentence2_length": 312, + "unique_sentence2": 997 + }, + "gbm-eng": { + "num_samples": 997, + "number_of_characters": 247009, + "unique_pairs": 997, + "min_sentence1_length": 30, + "average_sentence1_length": 122.18154463390171, + "max_sentence1_length": 344, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-gbm": { + "num_samples": 997, + "number_of_characters": 247009, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 30, + "average_sentence2_length": 122.18154463390171, + "max_sentence2_length": 344, + "unique_sentence2": 997 + }, + "gom-eng": { + "num_samples": 997, + "number_of_characters": 244553, + "unique_pairs": 997, + "min_sentence1_length": 31, + "average_sentence1_length": 119.71815446339016, + "max_sentence1_length": 306, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-gom": { + "num_samples": 997, + "number_of_characters": 244553, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 31, + "average_sentence2_length": 119.71815446339016, + "max_sentence2_length": 306, + "unique_sentence2": 997 + }, + "hne-eng": { + "num_samples": 997, + "number_of_characters": 246416, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 121.58676028084253, + "max_sentence1_length": 321, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-hne": { + "num_samples": 997, + "number_of_characters": 246405, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 121.57572718154464, + "max_sentence2_length": 321, + "unique_sentence2": 997 + }, + "raj-eng": { + "num_samples": 997, + "number_of_characters": 249541, + "unique_pairs": 997, + "min_sentence1_length": 32, + "average_sentence1_length": 124.72116349047141, + "max_sentence1_length": 313, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-raj": { + "num_samples": 997, + "number_of_characters": 249541, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 32, + "average_sentence2_length": 124.72116349047141, + "max_sentence2_length": 313, + "unique_sentence2": 997 + }, + "mai-eng": { + "num_samples": 997, + "number_of_characters": 247991, + "unique_pairs": 997, + "min_sentence1_length": 29, + "average_sentence1_length": 123.16649949849548, + "max_sentence1_length": 312, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-mai": { + "num_samples": 997, + "number_of_characters": 247994, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 29, + "average_sentence2_length": 123.16950852557673, + "max_sentence2_length": 312, + "unique_sentence2": 997 + }, + "mni-eng": { + "num_samples": 997, + "number_of_characters": 254308, + "unique_pairs": 997, + "min_sentence1_length": 39, + "average_sentence1_length": 129.5025075225677, + "max_sentence1_length": 310, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-mni": { + "num_samples": 997, + "number_of_characters": 254312, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 39, + "average_sentence2_length": 129.50651955867602, + "max_sentence2_length": 310, + "unique_sentence2": 997 + }, + "mup-eng": { + "num_samples": 997, + "number_of_characters": 248486, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 123.6629889669007, + "max_sentence1_length": 312, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-mup": { + "num_samples": 997, + "number_of_characters": 248486, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 123.6629889669007, + "max_sentence2_length": 312, + "unique_sentence2": 997 + }, + "mwr-eng": { + "num_samples": 997, + "number_of_characters": 248641, + "unique_pairs": 997, + "min_sentence1_length": 31, + "average_sentence1_length": 123.81845536609829, + "max_sentence1_length": 324, + "unique_sentence1": 997, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-mwr": { + "num_samples": 997, + "number_of_characters": 248641, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 31, + "average_sentence2_length": 123.81845536609829, + "max_sentence2_length": 324, + "unique_sentence2": 997 + }, + "sat-eng": { + "num_samples": 997, + "number_of_characters": 258279, + "unique_pairs": 997, + "min_sentence1_length": 37, + "average_sentence1_length": 133.4854563691073, + "max_sentence1_length": 333, + "unique_sentence1": 995, + "min_sentence2_length": 28, + "average_sentence2_length": 125.57071213640923, + "max_sentence2_length": 297, + "unique_sentence2": 997 + }, + "eng-sat": { + "num_samples": 997, + "number_of_characters": 258279, + "unique_pairs": 997, + "min_sentence1_length": 28, + "average_sentence1_length": 125.57071213640923, + "max_sentence1_length": 297, + "unique_sentence1": 997, + "min_sentence2_length": 37, + "average_sentence2_length": 133.4854563691073, + "max_sentence2_length": 333, + "unique_sentence2": 995 + } + } + }, + "test": { + "num_samples": 58696, + "number_of_characters": 15359416, + "unique_pairs": 58690, + "min_sentence1_length": 33, + "average_sentence1_length": 130.84266389532507, + "max_sentence1_length": 431, + "unique_sentence1": 30351, + "min_sentence2_length": 33, + "average_sentence2_length": 130.834724683113, + "max_sentence2_length": 431, + "unique_sentence2": 30351, + "hf_subset_descriptive_stats": { + "ben-eng": { + "num_samples": 1012, + "number_of_characters": 261008, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.51185770750988, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-ben": { + "num_samples": 1012, + "number_of_characters": 261008, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.51185770750988, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "guj-eng": { + "num_samples": 1012, + "number_of_characters": 258394, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 124.92885375494072, + "max_sentence1_length": 349, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-guj": { + "num_samples": 1012, + "number_of_characters": 258394, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 124.92885375494072, + "max_sentence2_length": 349, + "unique_sentence2": 1012 + }, + "hin-eng": { + "num_samples": 1012, + "number_of_characters": 263040, + "unique_pairs": 1012, + "min_sentence1_length": 41, + "average_sentence1_length": 129.5197628458498, + "max_sentence1_length": 381, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-hin": { + "num_samples": 1012, + "number_of_characters": 263029, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 41, + "average_sentence2_length": 129.5088932806324, + "max_sentence2_length": 381, + "unique_sentence2": 1012 + }, + "kan-eng": { + "num_samples": 1012, + "number_of_characters": 270091, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 136.48715415019763, + "max_sentence1_length": 388, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-kan": { + "num_samples": 1012, + "number_of_characters": 270021, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 136.4179841897233, + "max_sentence2_length": 388, + "unique_sentence2": 1012 + }, + "mal-eng": { + "num_samples": 1012, + "number_of_characters": 281302, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 147.56521739130434, + "max_sentence1_length": 376, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-mal": { + "num_samples": 1012, + "number_of_characters": 281302, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 147.56521739130434, + "max_sentence2_length": 376, + "unique_sentence2": 1012 + }, + "mar-eng": { + "num_samples": 1012, + "number_of_characters": 265212, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 131.66600790513834, + "max_sentence1_length": 356, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-mar": { + "num_samples": 1012, + "number_of_characters": 265023, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 131.47924901185772, + "max_sentence2_length": 355, + "unique_sentence2": 1012 + }, + "tam-eng": { + "num_samples": 1012, + "number_of_characters": 286099, + "unique_pairs": 1012, + "min_sentence1_length": 48, + "average_sentence1_length": 152.30533596837944, + "max_sentence1_length": 404, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-tam": { + "num_samples": 1012, + "number_of_characters": 286099, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 48, + "average_sentence2_length": 152.30533596837944, + "max_sentence2_length": 404, + "unique_sentence2": 1012 + }, + "tel-eng": { + "num_samples": 1012, + "number_of_characters": 264460, + "unique_pairs": 1012, + "min_sentence1_length": 39, + "average_sentence1_length": 130.92292490118578, + "max_sentence1_length": 359, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-tel": { + "num_samples": 1012, + "number_of_characters": 264447, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 39, + "average_sentence2_length": 130.9100790513834, + "max_sentence2_length": 359, + "unique_sentence2": 1012 + }, + "urd-eng": { + "num_samples": 1012, + "number_of_characters": 261886, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 128.37944664031622, + "max_sentence1_length": 348, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-urd": { + "num_samples": 1012, + "number_of_characters": 261885, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 128.37845849802372, + "max_sentence2_length": 348, + "unique_sentence2": 1012 + }, + "asm-eng": { + "num_samples": 1012, + "number_of_characters": 257902, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 124.44268774703558, + "max_sentence1_length": 329, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-asm": { + "num_samples": 1012, + "number_of_characters": 257909, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 124.449604743083, + "max_sentence2_length": 329, + "unique_sentence2": 1012 + }, + "bho-eng": { + "num_samples": 1012, + "number_of_characters": 260578, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.08695652173913, + "max_sentence1_length": 367, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-bho": { + "num_samples": 1012, + "number_of_characters": 260601, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.1096837944664, + "max_sentence2_length": 367, + "unique_sentence2": 1012 + }, + "nep-eng": { + "num_samples": 1012, + "number_of_characters": 258869, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 125.39822134387352, + "max_sentence1_length": 362, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-nep": { + "num_samples": 1012, + "number_of_characters": 258869, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 125.39822134387352, + "max_sentence2_length": 362, + "unique_sentence2": 1012 + }, + "ory-eng": { + "num_samples": 1012, + "number_of_characters": 266805, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 133.2401185770751, + "max_sentence1_length": 354, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-ory": { + "num_samples": 1012, + "number_of_characters": 266805, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 133.2401185770751, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "pan-eng": { + "num_samples": 1012, + "number_of_characters": 265391, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 131.84288537549406, + "max_sentence1_length": 380, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-pan": { + "num_samples": 1012, + "number_of_characters": 265391, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 131.84288537549406, + "max_sentence2_length": 380, + "unique_sentence2": 1012 + }, + "pus-eng": { + "num_samples": 1012, + "number_of_characters": 254422, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 121.00395256916995, + "max_sentence1_length": 325, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-pus": { + "num_samples": 1012, + "number_of_characters": 254421, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 121.00296442687747, + "max_sentence2_length": 325, + "unique_sentence2": 1012 + }, + "san-eng": { + "num_samples": 1012, + "number_of_characters": 260339, + "unique_pairs": 1012, + "min_sentence1_length": 33, + "average_sentence1_length": 126.85079051383399, + "max_sentence1_length": 358, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-san": { + "num_samples": 1012, + "number_of_characters": 260224, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 33, + "average_sentence2_length": 126.73715415019763, + "max_sentence2_length": 358, + "unique_sentence2": 1011 + }, + "awa-eng": { + "num_samples": 1012, + "number_of_characters": 260179, + "unique_pairs": 1012, + "min_sentence1_length": 34, + "average_sentence1_length": 126.69268774703558, + "max_sentence1_length": 378, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-awa": { + "num_samples": 1012, + "number_of_characters": 260137, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 34, + "average_sentence2_length": 126.65118577075098, + "max_sentence2_length": 378, + "unique_sentence2": 1012 + }, + "bgc-eng": { + "num_samples": 1012, + "number_of_characters": 257450, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 123.99604743083005, + "max_sentence1_length": 332, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-bgc": { + "num_samples": 1012, + "number_of_characters": 257450, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 123.99604743083005, + "max_sentence2_length": 332, + "unique_sentence2": 1012 + }, + "bod-eng": { + "num_samples": 1012, + "number_of_characters": 280188, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 146.46442687747034, + "max_sentence1_length": 431, + "unique_sentence1": 1009, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-bod": { + "num_samples": 1012, + "number_of_characters": 280126, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 146.40316205533597, + "max_sentence2_length": 431, + "unique_sentence2": 1009 + }, + "boy-eng": { + "num_samples": 1012, + "number_of_characters": 277538, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 143.84584980237153, + "max_sentence1_length": 396, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-boy": { + "num_samples": 1012, + "number_of_characters": 277538, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 143.84584980237153, + "max_sentence2_length": 396, + "unique_sentence2": 1011 + }, + "gbm-eng": { + "num_samples": 1012, + "number_of_characters": 261027, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 127.53063241106719, + "max_sentence1_length": 333, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-gbm": { + "num_samples": 1012, + "number_of_characters": 261027, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 127.53063241106719, + "max_sentence2_length": 333, + "unique_sentence2": 1012 + }, + "gom-eng": { + "num_samples": 1012, + "number_of_characters": 259182, + "unique_pairs": 1012, + "min_sentence1_length": 37, + "average_sentence1_length": 125.70750988142292, + "max_sentence1_length": 335, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-gom": { + "num_samples": 1012, + "number_of_characters": 259182, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 37, + "average_sentence2_length": 125.70750988142292, + "max_sentence2_length": 335, + "unique_sentence2": 1012 + }, + "hne-eng": { + "num_samples": 1012, + "number_of_characters": 258911, + "unique_pairs": 1012, + "min_sentence1_length": 42, + "average_sentence1_length": 125.43972332015811, + "max_sentence1_length": 327, + "unique_sentence1": 1011, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-hne": { + "num_samples": 1012, + "number_of_characters": 258915, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 42, + "average_sentence2_length": 125.44367588932806, + "max_sentence2_length": 326, + "unique_sentence2": 1011 + }, + "raj-eng": { + "num_samples": 1012, + "number_of_characters": 261987, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 128.47924901185772, + "max_sentence1_length": 338, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-raj": { + "num_samples": 1012, + "number_of_characters": 261987, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 128.47924901185772, + "max_sentence2_length": 338, + "unique_sentence2": 1012 + }, + "mai-eng": { + "num_samples": 1012, + "number_of_characters": 261374, + "unique_pairs": 1012, + "min_sentence1_length": 36, + "average_sentence1_length": 127.87351778656127, + "max_sentence1_length": 350, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-mai": { + "num_samples": 1012, + "number_of_characters": 261377, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 36, + "average_sentence2_length": 127.87648221343873, + "max_sentence2_length": 350, + "unique_sentence2": 1012 + }, + "mni-eng": { + "num_samples": 1012, + "number_of_characters": 268767, + "unique_pairs": 1012, + "min_sentence1_length": 38, + "average_sentence1_length": 135.17885375494072, + "max_sentence1_length": 353, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-mni": { + "num_samples": 1012, + "number_of_characters": 268768, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 38, + "average_sentence2_length": 135.1798418972332, + "max_sentence2_length": 354, + "unique_sentence2": 1012 + }, + "mup-eng": { + "num_samples": 1012, + "number_of_characters": 262034, + "unique_pairs": 1012, + "min_sentence1_length": 40, + "average_sentence1_length": 128.52569169960475, + "max_sentence1_length": 340, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-mup": { + "num_samples": 1012, + "number_of_characters": 262034, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 40, + "average_sentence2_length": 128.52569169960475, + "max_sentence2_length": 340, + "unique_sentence2": 1012 + }, + "mwr-eng": { + "num_samples": 1012, + "number_of_characters": 263749, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.22035573122528, + "max_sentence1_length": 345, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-mwr": { + "num_samples": 1012, + "number_of_characters": 263749, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.22035573122528, + "max_sentence2_length": 345, + "unique_sentence2": 1012 + }, + "sat-eng": { + "num_samples": 1012, + "number_of_characters": 271757, + "unique_pairs": 1012, + "min_sentence1_length": 43, + "average_sentence1_length": 138.13339920948616, + "max_sentence1_length": 366, + "unique_sentence1": 1012, + "min_sentence2_length": 35, + "average_sentence2_length": 130.401185770751, + "max_sentence2_length": 368, + "unique_sentence2": 1012 + }, + "eng-sat": { + "num_samples": 1012, + "number_of_characters": 271757, + "unique_pairs": 1012, + "min_sentence1_length": 35, + "average_sentence1_length": 130.401185770751, + "max_sentence1_length": 368, + "unique_sentence1": 1012, + "min_sentence2_length": 43, + "average_sentence2_length": 138.13339920948616, + "max_sentence2_length": 366, + "unique_sentence2": 1012 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/NTREXBitextMining.json b/mteb/descriptive_stats/BitextMining/NTREXBitextMining.json new file mode 100644 index 0000000000..3adf27b3df --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/NTREXBitextMining.json @@ -0,0 +1,24925 @@ +{ + "test": { + "num_samples": 3826252, + "number_of_characters": 988355274, + "unique_pairs": 3820263, + "min_sentence1_length": 1, + "average_sentence1_length": 129.15449296073547, + "max_sentence1_length": 773, + "unique_sentence1": 241259, + "min_sentence2_length": 1, + "average_sentence2_length": 129.15449296073547, + "max_sentence2_length": 773, + "unique_sentence2": 241259, + "hf_subset_descriptive_stats": { + "afr_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 520490, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "afr_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 564002, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "afr_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 516072, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "afr_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 526155, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "afr_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 530560, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "afr_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 549109, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "afr_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 560267, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "afr_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 516709, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "afr_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 519796, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "afr_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 520179, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.37756634952427, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "amh_Ethi-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 415227, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "amh_Ethi-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 437473, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "amh_Ethi-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 413608, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "amh_Ethi-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 459006, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "amh_Ethi-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 404938, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "amh_Ethi-som_Latn": { + "num_samples": 1997, + "number_of_characters": 458799, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "amh_Ethi-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 455649, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "amh_Ethi-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 440016, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "amh_Ethi-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 332745, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "amh_Ethi-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 501790, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "amh_Ethi-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 407310, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "amh_Ethi-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 435597, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "amh_Ethi-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 483595, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "amh_Ethi-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 425239, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 83.87931897846771, + "max_sentence1_length": 290, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "arb_Arab-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 474983, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "arb_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 483548, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "arb_Arab-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 526831, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "arb_Arab-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 530308, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "arb_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 478901, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "arb_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 474520, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "arb_Arab-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 500981, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "arb_Arab-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 524289, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "arb_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 431477, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "arb_Arab-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 492756, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "arb_Arab-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 509557, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "arb_Arab-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 518153, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "arb_Arab-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 342807, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "arb_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 477127, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "arb_Arab-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 364586, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "arb_Arab-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 490578, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "arb_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 445016, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "arb_Arab-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 523096, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "arb_Arab-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 509047, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "arb_Arab-por_Latn": { + "num_samples": 1997, + "number_of_characters": 508396, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "arb_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 473717, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "arb_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 473814, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "arb_Arab-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506074, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "arb_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 446094, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "arb_Arab-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 519381, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "arb_Arab-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 503690, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "arb_Arab-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 483008, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "arb_Arab-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 541142, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "arb_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 505328, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "arb_Arab-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 496794, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "arb_Arab-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 502302, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "arb_Arab-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 322659, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "arb_Arab-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 488913, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 115.76414621932899, + "max_sentence1_length": 362, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "aze_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515960, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "aze_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 517354, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "aze_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 529910, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "aze_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520498, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "aze_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515560, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "aze_Latn-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 554908, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "aze_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 535247, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "aze_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 580656, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "aze_Latn-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 563329, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 135.0195292939409, + "max_sentence1_length": 398, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "bak_Cyrl-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 515960, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "bak_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 494046, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "bak_Cyrl-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506602, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "bak_Cyrl-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 497190, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "bak_Cyrl-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 492252, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "bak_Cyrl-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 531600, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "bak_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 511939, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "bak_Cyrl-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 557348, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "bak_Cyrl-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 540021, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 123.34802203304957, + "max_sentence1_length": 437, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "bel_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 511000, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "bel_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 525979, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "bel_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 497408, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "bel_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 503810, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "bel_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 512015, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "bel_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 523981, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "bel_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 533956, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "bel_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530983, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "bel_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 509059, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "bel_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 508986, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "bel_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508393, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "bel_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 512231, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "bel_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518873, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 128.2373560340511, + "max_sentence1_length": 422, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "bem_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 546212, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "bem_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 537470, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "bem_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 526972, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "bem_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 602279, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "bem_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 596231, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "bem_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 582774, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "bem_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 596822, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "bem_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 598248, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 149.47020530796195, + "max_sentence1_length": 465, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "ben_Beng-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 474983, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "ben_Beng-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 539452, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "ben_Beng-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 547650, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "ben_Beng-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 542929, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "ben_Beng-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 491522, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ben_Beng-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 519005, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "ben_Beng-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 487141, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "ben_Beng-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 513602, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "ben_Beng-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 536910, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "ben_Beng-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 488733, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "ben_Beng-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 444098, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "ben_Beng-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 505377, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "ben_Beng-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 522178, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "ben_Beng-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 530774, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "ben_Beng-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 355428, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "ben_Beng-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 509338, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "ben_Beng-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 377207, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "ben_Beng-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 503199, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "ben_Beng-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 504689, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "ben_Beng-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 492025, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "ben_Beng-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 535717, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "ben_Beng-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 494224, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "ben_Beng-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 521668, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "ben_Beng-por_Latn": { + "num_samples": 1997, + "number_of_characters": 521017, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "ben_Beng-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518695, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "ben_Beng-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 502543, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "ben_Beng-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 464129, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "ben_Beng-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 532002, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "ben_Beng-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 516311, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "ben_Beng-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 495629, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "ben_Beng-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 553763, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "ben_Beng-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 491329, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "ben_Beng-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 509415, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "ben_Beng-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 491800, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "ben_Beng-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 514923, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "ben_Beng-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 335280, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "ben_Beng-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 501534, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 122.08412618928392, + "max_sentence1_length": 402, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "bod_Tibt-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 543850, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "unique_sentence1": 1993, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "unique_sentence2": 1992 + }, + "bod_Tibt-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 548349, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "bod_Tibt-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 589120, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "unique_sentence2": 1996 + }, + "bod_Tibt-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 567609, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "unique_sentence2": 1997 + }, + "bod_Tibt-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 559677, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "unique_sentence1": 1993, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "unique_sentence2": 1997 + }, + "bod_Tibt-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 612483, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "unique_sentence1": 1993, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "unique_sentence2": 1997 + }, + "bod_Tibt-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 538097, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 150.54031046569855, + "max_sentence1_length": 478, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "unique_sentence2": 1996 + }, + "bos_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511000, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "bos_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 524799, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "bos_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 496228, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "bos_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 502630, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "bos_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 510835, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "bos_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522801, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "bos_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 532776, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "bos_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 529803, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "bos_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 507879, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "bos_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 507806, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "bos_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 507213, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "bos_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 511051, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "bos_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 517693, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 127.64646970455684, + "max_sentence1_length": 434, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "bul_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 525979, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "bul_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 524799, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "bul_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 511207, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "bul_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 517609, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "bul_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 525814, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "bul_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 537780, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "bul_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 547755, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "bul_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 544782, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "bul_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 522858, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "bul_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 522785, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "bul_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522192, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "bul_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 526030, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "bul_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 532672, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 135.14722083124687, + "max_sentence1_length": 493, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "cat_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 530680, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "cat_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 576068, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "cat_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 554946, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "cat_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 572177, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "cat_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 560435, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "cat_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 560175, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "cat_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 575445, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "cat_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 571160, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 141.6925388082123, + "max_sentence1_length": 460, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "ces_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 497408, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "ces_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 496228, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "ces_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511207, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "ces_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 489038, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ces_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 497243, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "ces_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 509209, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "ces_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 519184, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "ces_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 516211, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "ces_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 494287, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "ces_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 494214, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "ces_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 493621, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "ces_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 497459, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "ces_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 504101, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 120.84026039058588, + "max_sentence1_length": 474, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "ckb_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 483548, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "ckb_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 500087, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ckb_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 495706, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "ckb_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 452663, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "ckb_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 498313, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "ckb_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 466202, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "ckb_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 494903, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "ckb_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 495000, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "ckb_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 467280, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "ckb_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 526514, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 126.37305958938407, + "max_sentence1_length": 399, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "cym_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 514225, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.4526790185278, + "max_sentence1_length": 444, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "cym_Latn-gle_Latn": { + "num_samples": 1997, + "number_of_characters": 561314, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.4526790185278, + "max_sentence1_length": 444, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 147.62593890836254, + "max_sentence2_length": 461, + "unique_sentence2": 1997 + }, + "dan_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 520490, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "dan_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 547788, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "dan_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 499858, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "dan_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 509941, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "dan_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 514346, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "dan_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 532895, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "dan_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 544053, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "dan_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 500495, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "dan_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 503582, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "dan_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 503965, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 126.25838758137206, + "max_sentence1_length": 522, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "deu_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 564002, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "deu_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 526831, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "deu_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 539452, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "deu_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 547788, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "deu_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 594777, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "deu_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 543370, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "deu_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 553453, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "deu_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 538989, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "deu_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 565450, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "deu_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 588758, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "deu_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 495946, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "deu_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 557225, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "deu_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 574026, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "deu_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 582622, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "deu_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 557858, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "deu_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 407276, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "deu_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 429055, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "deu_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 555047, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "deu_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 576407, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "deu_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 587565, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "deu_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 544007, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "deu_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 547094, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "deu_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 573516, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "deu_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 572865, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "deu_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 570543, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "deu_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 583850, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "deu_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 568159, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "deu_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 547477, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "deu_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 605611, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "deu_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 561263, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "deu_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 566771, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "deu_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 387128, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "deu_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 553382, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 148.04707060590886, + "max_sentence1_length": 508, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "div_Thaa-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 547650, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "div_Thaa-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 551568, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "div_Thaa-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 579051, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "div_Thaa-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 548779, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "div_Thaa-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 565423, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "div_Thaa-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 569384, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "div_Thaa-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 564735, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "div_Thaa-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 552071, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "div_Thaa-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 554270, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "div_Thaa-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 562589, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "div_Thaa-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 524175, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "div_Thaa-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 613809, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "div_Thaa-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 551375, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "div_Thaa-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 551846, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 152.15222834251378, + "max_sentence1_length": 609, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "dzo_Tibt-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 543850, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "unique_sentence1": 1992, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "unique_sentence2": 1993 + }, + "dzo_Tibt-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 490941, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "unique_sentence1": 1992, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "dzo_Tibt-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 531712, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "unique_sentence1": 1992, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "unique_sentence2": 1996 + }, + "dzo_Tibt-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 510201, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "unique_sentence1": 1992, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "unique_sentence2": 1997 + }, + "dzo_Tibt-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 502269, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "unique_sentence1": 1992, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "unique_sentence2": 1997 + }, + "dzo_Tibt-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 555075, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "unique_sentence1": 1992, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "unique_sentence2": 1997 + }, + "dzo_Tibt-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 480689, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 121.79318978467701, + "max_sentence1_length": 411, + "unique_sentence1": 1992, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "unique_sentence2": 1996 + }, + "ell_Grek-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 530308, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "ell_Grek-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 542929, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "ell_Grek-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 594777, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "ell_Grek-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 546847, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ell_Grek-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 542466, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "ell_Grek-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 568927, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "ell_Grek-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 592235, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "ell_Grek-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 499423, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "ell_Grek-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 560702, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "ell_Grek-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 577503, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "ell_Grek-hye_Armn": { + "num_samples": 1997, + "number_of_characters": 563842, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 132.55633450175262, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "ell_Grek-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 586099, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "ell_Grek-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 410753, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "ell_Grek-kat_Geor": { + "num_samples": 1997, + "number_of_characters": 565719, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 133.49624436654983, + "max_sentence2_length": 503, + "unique_sentence2": 1995 + }, + "ell_Grek-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 432532, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "ell_Grek-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 558524, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "ell_Grek-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 591042, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "ell_Grek-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 576993, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "ell_Grek-por_Latn": { + "num_samples": 1997, + "number_of_characters": 576342, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "ell_Grek-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 574020, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "ell_Grek-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 587327, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "ell_Grek-sqi_Latn": { + "num_samples": 1997, + "number_of_characters": 582734, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 142.01652478718077, + "max_sentence2_length": 461, + "unique_sentence2": 1996 + }, + "ell_Grek-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 571636, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "ell_Grek-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 550954, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "ell_Grek-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 609088, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "ell_Grek-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 564740, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "ell_Grek-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 570248, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "ell_Grek-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 390605, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "ell_Grek-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 556859, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 149.78818227341011, + "max_sentence1_length": 584, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "eng_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 516072, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "eng_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 415227, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "eng_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 478901, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "eng_Latn-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 517354, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "eng_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 494046, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "eng_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 503810, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "eng_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 546212, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "eng_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 491522, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "eng_Latn-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 548349, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "unique_sentence2": 1993 + }, + "eng_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 502630, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "eng_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 517609, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "eng_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 530680, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "eng_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 489038, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "eng_Latn-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 500087, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "eng_Latn-cym_Latn": { + "num_samples": 1997, + "number_of_characters": 514225, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.4526790185278, + "max_sentence2_length": 444, + "unique_sentence2": 1997 + }, + "eng_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 499858, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "eng_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 543370, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "eng_Latn-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 551568, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "eng_Latn-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 490941, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "unique_sentence2": 1992 + }, + "eng_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 546847, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "eng_Latn-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 522923, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "eng_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 486698, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "eng_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 505523, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "eng_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 491059, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "eng_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 548225, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "eng_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 541140, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "eng_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 517520, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "eng_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 540828, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "eng_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 476200, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "eng_Latn-gle_Latn": { + "num_samples": 1997, + "number_of_characters": 542529, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 147.62593890836254, + "max_sentence2_length": 461, + "unique_sentence2": 1997 + }, + "eng_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 519706, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "eng_Latn-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 492651, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "eng_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 517686, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "eng_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 448016, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "eng_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 509295, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "eng_Latn-hmn_Latn": { + "num_samples": 1997, + "number_of_characters": 578510, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 165.6434651977967, + "max_sentence2_length": 643, + "unique_sentence2": 1997 + }, + "eng_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 503645, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "eng_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 526096, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "eng_Latn-hye_Armn": { + "num_samples": 1997, + "number_of_characters": 512435, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 132.55633450175262, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "eng_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 493821, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "eng_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 534692, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "eng_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 509928, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "eng_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 536937, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "eng_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 359346, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "eng_Latn-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 513256, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "eng_Latn-kat_Geor": { + "num_samples": 1997, + "number_of_characters": 514312, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 133.49624436654983, + "max_sentence2_length": 503, + "unique_sentence2": 1995 + }, + "eng_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 507996, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "eng_Latn-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 536211, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "unique_sentence2": 1996 + }, + "eng_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 551507, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "eng_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 498584, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "eng_Latn-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 493666, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "eng_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 381125, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "eng_Latn-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 514700, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "unique_sentence2": 1997 + }, + "eng_Latn-lav_Latn": { + "num_samples": 1997, + "number_of_characters": 515908, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 134.29544316474713, + "max_sentence2_length": 503, + "unique_sentence2": 1994 + }, + "eng_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 507117, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "eng_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 528477, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "eng_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 551872, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "eng_Latn-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 508607, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "eng_Latn-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 461555, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "eng_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515611, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "eng_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 568028, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "eng_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 525195, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "eng_Latn-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 506768, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "unique_sentence2": 1997 + }, + "eng_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 521844, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "eng_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 524903, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "eng_Latn-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 559574, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "unique_sentence2": 1997 + }, + "eng_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 545459, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "eng_Latn-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 495943, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "eng_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 539635, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "eng_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 496077, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "eng_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 499164, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "eng_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 539219, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "eng_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 532002, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "eng_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 485151, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "eng_Latn-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 498142, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "eng_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 525586, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "eng_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 524935, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "eng_Latn-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 490256, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "eng_Latn-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 490353, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "eng_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 540205, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "eng_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522613, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "eng_Latn-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 462633, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "eng_Latn-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 506461, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "eng_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 500689, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "eng_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 500616, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "eng_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 525575, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "eng_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 546050, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "eng_Latn-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 468047, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "eng_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 539012, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "eng_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 535920, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "eng_Latn-sqi_Latn": { + "num_samples": 1997, + "number_of_characters": 531327, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 142.01652478718077, + "max_sentence2_length": 461, + "unique_sentence2": 1996 + }, + "eng_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 500023, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "eng_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 503861, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "eng_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 535862, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "eng_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 520229, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "eng_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 499547, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "eng_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 557343, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "eng_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 557681, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "eng_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 493646, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "eng_Latn-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 495247, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "eng_Latn-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 521867, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "eng_Latn-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 485188, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "unique_sentence2": 1996 + }, + "eng_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 412958, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "eng_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 561360, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "eng_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 582003, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "eng_Latn-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 532994, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "eng_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 513333, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "eng_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 558742, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "eng_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 510503, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "eng_Latn-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 495718, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "eng_Latn-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 541415, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "eng_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 547476, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "eng_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 518841, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "eng_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 487523, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "eng_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 515810, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "eng_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 563808, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "eng_Latn-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 326607, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "unique_sentence2": 1996 + }, + "eng_Latn-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 332681, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "unique_sentence2": 1997 + }, + "eng_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 339198, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "eng_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 505452, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.04606910365548, + "max_sentence1_length": 437, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "eus_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 519005, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "eus_Latn-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 579051, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "eus_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 522923, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "eus_Latn-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 520134, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "eus_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 536778, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "eus_Latn-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 540739, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "eus_Latn-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 536090, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "eus_Latn-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 523426, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "eus_Latn-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 525625, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "eus_Latn-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 533944, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "eus_Latn-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 495530, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "eus_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 585164, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "eus_Latn-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 522730, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "eus_Latn-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 523201, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 137.80821231847773, + "max_sentence1_length": 393, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "ewe_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 537470, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "ewe_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 486698, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ewe_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 467458, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "ewe_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 542765, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "ewe_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 536717, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "ewe_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 523260, + "unique_pairs": 1995, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "ewe_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 537308, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "ewe_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 538734, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 119.6685027541312, + "max_sentence1_length": 493, + "unique_sentence1": 1994, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "fao_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 526155, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "fao_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 509941, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "fao_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 553453, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "fao_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 505523, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "fao_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 520011, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "fao_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 538560, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "fao_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 549718, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "fao_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 506160, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "fao_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 509247, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "fao_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 509630, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0951427140711, + "max_sentence1_length": 433, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "fas_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 474520, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "fas_Arab-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 487141, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "fas_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 495706, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "fas_Arab-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 538989, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "fas_Arab-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 542466, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "fas_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 491059, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "fas_Arab-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 513139, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "fas_Arab-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 536447, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "fas_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 443635, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "fas_Arab-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 504914, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "fas_Arab-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 521715, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "fas_Arab-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 530311, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "fas_Arab-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 354965, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "fas_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 489285, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "fas_Arab-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 376744, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "fas_Arab-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 502736, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "fas_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 457174, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "fas_Arab-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 535254, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "fas_Arab-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 521205, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "fas_Arab-por_Latn": { + "num_samples": 1997, + "number_of_characters": 520554, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "fas_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 485875, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "fas_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 485972, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "fas_Arab-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518232, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "fas_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 458252, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "fas_Arab-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 531539, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "fas_Arab-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 515848, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "fas_Arab-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 495166, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "fas_Arab-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 553300, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "fas_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 517486, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "fas_Arab-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 508952, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "fas_Arab-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 514460, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "fas_Arab-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 334817, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "fas_Arab-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 501071, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 121.85227841762644, + "max_sentence1_length": 389, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "fij_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 548225, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "fij_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 593925, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "fij_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 587477, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "fij_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 604657, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "fij_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 620813, + "unique_pairs": 1995, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "fij_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 574629, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "fij_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 577688, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "fij_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 578360, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "fij_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 610128, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "fij_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 614145, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.478217325989, + "max_sentence1_length": 448, + "unique_sentence1": 1988, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "fil_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 541140, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "fil_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 593925, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "fil_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 580392, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "fil_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 597572, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "fil_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 613728, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "fil_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 567544, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "fil_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 570603, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "fil_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 571275, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "fil_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 603043, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "fil_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 607060, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 146.93039559339007, + "max_sentence1_length": 554, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "fin_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 500981, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "fin_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 513602, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "fin_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 565450, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "fin_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 568927, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "fin_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 517520, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "fin_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 513139, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "fin_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 562908, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "fin_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 470096, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "fin_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 531375, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "fin_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 548176, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "fin_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 556772, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "fin_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 381426, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "fin_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 403205, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "fin_Latn-lav_Latn": { + "num_samples": 1997, + "number_of_characters": 537988, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.29544316474713, + "max_sentence2_length": 503, + "unique_sentence2": 1994 + }, + "fin_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 529197, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "fin_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 561715, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "fin_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 547666, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "fin_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 547015, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "fin_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 544693, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "fin_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 558000, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "fin_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 542309, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "fin_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 521627, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "fin_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 579761, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "fin_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 535413, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "fin_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 540921, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "fin_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 361278, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "fin_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 527532, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.10265398097147, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "fra_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 524289, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "fra_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 536910, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "fra_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 576068, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "fra_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 588758, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "fra_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 592235, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "fra_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 540828, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "fra_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 536447, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "fra_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 562908, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "fra_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 565094, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "fra_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 493404, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "fra_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 554683, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "fra_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 571484, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "fra_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 580080, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "fra_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 582325, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "fra_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 404734, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "fra_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 426513, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "fra_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 552505, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "fra_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 570583, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "fra_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 585023, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "fra_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 570974, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "fra_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 570323, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "fra_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 585593, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "fra_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 568001, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "fra_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 581308, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "fra_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 565617, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "fra_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 544935, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "fra_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 603069, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "fra_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 558721, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "fra_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 564229, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "fra_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 384586, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "fra_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 550840, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.7741612418628, + "max_sentence1_length": 512, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "fuc_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 526972, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "fuc_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 476200, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "fuc_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 467458, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "fuc_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 532267, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "fuc_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 526219, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "fuc_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 512762, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "fuc_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 526810, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "fuc_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 528236, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 114.4116174261392, + "max_sentence1_length": 376, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "gle_Latn-cym_Latn": { + "num_samples": 1997, + "number_of_characters": 561314, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 147.62593890836254, + "max_sentence1_length": 461, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.4526790185278, + "max_sentence2_length": 444, + "unique_sentence2": 1997 + }, + "gle_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 542529, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 147.62593890836254, + "max_sentence1_length": 461, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "glg_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 554946, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "glg_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 519706, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "glg_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 565094, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "glg_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 561203, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "glg_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 549461, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "glg_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 549201, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "glg_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 564471, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "glg_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 560186, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 136.19729594391586, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "guj_Gujr-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 488733, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "guj_Gujr-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 548779, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "guj_Gujr-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 492651, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "guj_Gujr-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 520134, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "guj_Gujr-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 506506, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "guj_Gujr-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 510467, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "guj_Gujr-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 505818, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "guj_Gujr-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 493154, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "guj_Gujr-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 495353, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "guj_Gujr-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 503672, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "guj_Gujr-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 465258, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "guj_Gujr-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 554892, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "guj_Gujr-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 492458, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "guj_Gujr-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 492929, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 122.64947421131697, + "max_sentence1_length": 378, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "hau_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 437473, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "hau_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 517686, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "hau_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 516067, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "hau_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 561465, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "hau_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 507397, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "hau_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 561258, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "hau_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 558108, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "hau_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 542475, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "hau_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 435204, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "hau_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 604249, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "hau_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 509769, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "hau_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 538056, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "hau_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 586054, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "hau_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 527698, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 135.185778668002, + "max_sentence1_length": 483, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "heb_Hebr-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 431477, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "heb_Hebr-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 444098, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "heb_Hebr-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 452663, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "heb_Hebr-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 495946, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "heb_Hebr-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 499423, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "heb_Hebr-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 448016, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "heb_Hebr-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 443635, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "heb_Hebr-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 470096, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "heb_Hebr-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 493404, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "heb_Hebr-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 461871, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "heb_Hebr-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 478672, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "heb_Hebr-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 487268, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "heb_Hebr-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 311922, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "heb_Hebr-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 446242, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "heb_Hebr-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 333701, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "heb_Hebr-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 459693, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "heb_Hebr-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 414131, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "heb_Hebr-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 492211, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "heb_Hebr-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 478162, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "heb_Hebr-por_Latn": { + "num_samples": 1997, + "number_of_characters": 477511, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "heb_Hebr-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 442832, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "heb_Hebr-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 442929, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "heb_Hebr-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 475189, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "heb_Hebr-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 415209, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "heb_Hebr-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 488496, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "heb_Hebr-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 472805, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "heb_Hebr-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 452123, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "heb_Hebr-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 510257, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "heb_Hebr-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 474443, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "heb_Hebr-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 465909, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "heb_Hebr-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 471417, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "heb_Hebr-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 291774, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "heb_Hebr-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 458028, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 100.29844767150726, + "max_sentence1_length": 375, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "hin_Deva-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 492756, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "hin_Deva-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 505377, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "hin_Deva-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 557225, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "hin_Deva-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 565423, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "hin_Deva-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 560702, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "hin_Deva-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 509295, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "hin_Deva-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 536778, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "hin_Deva-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 504914, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "hin_Deva-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 531375, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "hin_Deva-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 554683, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "hin_Deva-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 506506, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "hin_Deva-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 461871, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "hin_Deva-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 539951, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "hin_Deva-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 548547, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "hin_Deva-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 373201, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "hin_Deva-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 527111, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "hin_Deva-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 394980, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "hin_Deva-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 520972, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "hin_Deva-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 522462, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "hin_Deva-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 509798, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "hin_Deva-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 553490, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "hin_Deva-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 511997, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "hin_Deva-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 539441, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "hin_Deva-por_Latn": { + "num_samples": 1997, + "number_of_characters": 538790, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "hin_Deva-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 536468, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "hin_Deva-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 520316, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "hin_Deva-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 481902, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "hin_Deva-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 549775, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "hin_Deva-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 534084, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "hin_Deva-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 513402, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "hin_Deva-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 571536, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "hin_Deva-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 509102, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "hin_Deva-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 527188, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "hin_Deva-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 509573, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "hin_Deva-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 532696, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "hin_Deva-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 353053, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "hin_Deva-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 519307, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 130.9839759639459, + "max_sentence1_length": 394, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "hmn_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 578510, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 165.6434651977967, + "max_sentence1_length": 643, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "hrv_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 512015, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "hrv_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 510835, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "hrv_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 525814, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "hrv_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 497243, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "hrv_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 503645, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "hrv_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 523816, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "hrv_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 533791, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "hrv_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530818, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "hrv_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 508894, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "hrv_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 508821, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "hrv_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508228, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "hrv_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 512066, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "hrv_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518708, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 128.1547320981472, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "hun_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 509557, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "hun_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 522178, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "hun_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 574026, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "hun_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 577503, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "hun_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 526096, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "hun_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 521715, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "hun_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 548176, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "hun_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 571484, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "hun_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 478672, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "hun_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 539951, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "hun_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 565348, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "hun_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 390002, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "hun_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 411781, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "hun_Latn-lav_Latn": { + "num_samples": 1997, + "number_of_characters": 546564, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 134.29544316474713, + "max_sentence2_length": 503, + "unique_sentence2": 1994 + }, + "hun_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 537773, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "hun_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 570291, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "hun_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 556242, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "hun_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 555591, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "hun_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 553269, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "hun_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 566576, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "hun_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 550885, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "hun_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 530203, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "hun_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 588337, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "hun_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 543989, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "hun_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 549497, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "hun_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 369854, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "hun_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 536108, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 139.3970956434652, + "max_sentence1_length": 508, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "hye_Armn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 563842, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 132.55633450175262, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "hye_Armn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 512435, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 132.55633450175262, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "hye_Armn-kat_Geor": { + "num_samples": 1997, + "number_of_characters": 531307, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 132.55633450175262, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 133.49624436654983, + "max_sentence2_length": 503, + "unique_sentence2": 1995 + }, + "hye_Armn-sqi_Latn": { + "num_samples": 1997, + "number_of_characters": 548322, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 132.55633450175262, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 142.01652478718077, + "max_sentence2_length": 461, + "unique_sentence2": 1996 + }, + "ibo_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 413608, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "ibo_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 493821, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ibo_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 516067, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "ibo_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 537600, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "ibo_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 483532, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "ibo_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 537393, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "ibo_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 534243, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "ibo_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 518610, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "ibo_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 411339, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "ibo_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 580384, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "ibo_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 485904, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "ibo_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 514191, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "ibo_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 562189, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "ibo_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 503833, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 123.23535302954431, + "max_sentence1_length": 469, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "ind_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 518153, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "ind_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 530774, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "ind_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 582622, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "ind_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 586099, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "ind_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 534692, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ind_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 530311, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "ind_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 587477, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "ind_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 580392, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "ind_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 556772, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "ind_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 580080, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "ind_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 487268, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "ind_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 548547, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "ind_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 565348, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "ind_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 398598, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "ind_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 420377, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "ind_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 546369, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "ind_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 591124, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "ind_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 607280, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "ind_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 561096, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "ind_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 564155, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "ind_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 578887, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "ind_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 564838, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "ind_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 564187, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "ind_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 561865, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "ind_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 564827, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "ind_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 575172, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "ind_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 559481, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "ind_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 538799, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "ind_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 596595, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "ind_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 596933, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "ind_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 600612, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "ind_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 552585, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "ind_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 558093, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "ind_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 378450, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "ind_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 544704, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 143.70155232849274, + "max_sentence1_length": 486, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "isl_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 530560, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "isl_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 514346, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "isl_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 557858, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "isl_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 509928, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "isl_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 520011, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "isl_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 542965, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "isl_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 554123, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "isl_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 510565, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "isl_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 513652, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "isl_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 514035, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 131.30095142714072, + "max_sentence1_length": 399, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "ita_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 572177, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "ita_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 536937, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ita_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 582325, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "ita_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 561203, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "ita_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 566692, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "ita_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 566432, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "ita_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 581702, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "ita_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 577417, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 144.82573860791186, + "max_sentence1_length": 623, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "jpn_Jpan-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 342807, + "unique_pairs": 1995, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "jpn_Jpan-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 355428, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "jpn_Jpan-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 407276, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "jpn_Jpan-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 410753, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "jpn_Jpan-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 359346, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "jpn_Jpan-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 354965, + "unique_pairs": 1995, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "jpn_Jpan-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 381426, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "jpn_Jpan-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 404734, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "jpn_Jpan-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 311922, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "jpn_Jpan-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 373201, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "jpn_Jpan-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 390002, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "jpn_Jpan-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 398598, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "jpn_Jpan-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 245031, + "unique_pairs": 1995, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "jpn_Jpan-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 371023, + "unique_pairs": 1995, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "jpn_Jpan-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 403541, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "jpn_Jpan-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 389492, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "jpn_Jpan-por_Latn": { + "num_samples": 1997, + "number_of_characters": 388841, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "jpn_Jpan-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 386519, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "jpn_Jpan-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 399826, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "jpn_Jpan-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 384135, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "jpn_Jpan-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 363453, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "jpn_Jpan-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 421587, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "jpn_Jpan-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 377239, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "jpn_Jpan-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 382747, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "jpn_Jpan-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 190513, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "unique_sentence2": 1996 + }, + "jpn_Jpan-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 196587, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "unique_sentence2": 1997 + }, + "jpn_Jpan-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 203104, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "jpn_Jpan-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 369358, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 55.89684526790185, + "max_sentence1_length": 189, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "kan_Knda-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 509338, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "kan_Knda-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 569384, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "kan_Knda-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 513256, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "kan_Knda-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 540739, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "kan_Knda-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 510467, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "kan_Knda-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 527111, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "kan_Knda-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 526423, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "kan_Knda-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 513759, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "kan_Knda-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 515958, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "kan_Knda-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 524277, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "kan_Knda-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 485863, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "kan_Knda-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 575497, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "kan_Knda-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 513063, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "kan_Knda-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 513534, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 132.96745117676514, + "max_sentence1_length": 449, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "kat_Geor-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 565719, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 133.49624436654983, + "max_sentence1_length": 503, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "kat_Geor-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 514312, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 133.49624436654983, + "max_sentence1_length": 503, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "kat_Geor-hye_Armn": { + "num_samples": 1997, + "number_of_characters": 531307, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 133.49624436654983, + "max_sentence1_length": 503, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 132.55633450175262, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "kat_Geor-sqi_Latn": { + "num_samples": 1997, + "number_of_characters": 550199, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 133.49624436654983, + "max_sentence1_length": 503, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 142.01652478718077, + "max_sentence2_length": 461, + "unique_sentence2": 1996 + }, + "kaz_Cyrl-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 529910, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "kaz_Cyrl-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506602, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "kaz_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 507996, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "kaz_Cyrl-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511140, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "kaz_Cyrl-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506202, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "kaz_Cyrl-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 545550, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "kaz_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 525889, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "kaz_Cyrl-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 571298, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "kaz_Cyrl-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 553971, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 130.33350025037555, + "max_sentence1_length": 473, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "khm_Khmr-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 589120, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "unique_sentence2": 1993 + }, + "khm_Khmr-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 531712, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "unique_sentence2": 1992 + }, + "khm_Khmr-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 536211, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "khm_Khmr-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 555471, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "unique_sentence2": 1997 + }, + "khm_Khmr-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 547539, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "unique_sentence2": 1997 + }, + "khm_Khmr-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 600345, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "unique_sentence2": 1997 + }, + "khm_Khmr-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 525959, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 144.4621932899349, + "max_sentence1_length": 517, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "unique_sentence2": 1996 + }, + "kin_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 602279, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "kin_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 551507, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "kin_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 542765, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "kin_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 532267, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "kin_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 601526, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "kin_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 588069, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "kin_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 602117, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "kin_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 603543, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 152.12168252378567, + "max_sentence1_length": 541, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "kir_Cyrl-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 520498, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "kir_Cyrl-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 497190, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "kir_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 498584, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "kir_Cyrl-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511140, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "kir_Cyrl-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 496790, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "kir_Cyrl-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 536138, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "kir_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 516477, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "kir_Cyrl-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 561886, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "kir_Cyrl-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 544559, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.62043064596895, + "max_sentence1_length": 395, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "kmr_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 477127, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "kmr_Latn-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 498313, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "kmr_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 493666, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "kmr_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 489285, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "kmr_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 446242, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "kmr_Latn-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 459781, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "kmr_Latn-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 488482, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "kmr_Latn-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 488579, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "kmr_Latn-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 460859, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "kmr_Latn-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520093, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.15773660490736, + "max_sentence1_length": 420, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "kor_Hang-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 364586, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "kor_Hang-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 377207, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "kor_Hang-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 429055, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "kor_Hang-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 432532, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "kor_Hang-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 381125, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "kor_Hang-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 376744, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "kor_Hang-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 403205, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "kor_Hang-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 426513, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "kor_Hang-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 333701, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "kor_Hang-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 394980, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "kor_Hang-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 411781, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "kor_Hang-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 420377, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "kor_Hang-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 245031, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "kor_Hang-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 392802, + "unique_pairs": 1995, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "kor_Hang-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 425320, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "kor_Hang-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 411271, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "kor_Hang-por_Latn": { + "num_samples": 1997, + "number_of_characters": 410620, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "kor_Hang-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 408298, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "kor_Hang-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 421605, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "kor_Hang-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 405914, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "kor_Hang-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 385232, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "kor_Hang-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 443366, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "kor_Hang-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 399018, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "kor_Hang-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 404526, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "kor_Hang-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 212292, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "unique_sentence2": 1996 + }, + "kor_Hang-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 218366, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "unique_sentence2": 1997 + }, + "kor_Hang-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 224883, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "kor_Hang-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 391137, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 66.80270405608412, + "max_sentence1_length": 217, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "lao_Laoo-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 567609, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "unique_sentence2": 1993 + }, + "lao_Laoo-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 510201, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "unique_sentence2": 1992 + }, + "lao_Laoo-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 514700, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "lao_Laoo-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 555471, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "unique_sentence2": 1996 + }, + "lao_Laoo-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 526028, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "unique_sentence2": 1997 + }, + "lao_Laoo-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 578834, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "unique_sentence2": 1997 + }, + "lao_Laoo-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 504448, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 133.69053580370556, + "max_sentence1_length": 507, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "unique_sentence2": 1996 + }, + "lav_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 515908, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 134.29544316474713, + "max_sentence1_length": 503, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "lav_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 537988, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 134.29544316474713, + "max_sentence1_length": 503, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "lav_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 546564, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 134.29544316474713, + "max_sentence1_length": 503, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "lav_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 527585, + "unique_pairs": 1995, + "min_sentence1_length": 7, + "average_sentence1_length": 134.29544316474713, + "max_sentence1_length": 503, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "lit_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 490578, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "lit_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 503199, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "lit_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 555047, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "lit_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 558524, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "lit_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 507117, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "lit_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 502736, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "lit_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 529197, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "lit_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 552505, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "lit_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 459693, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "lit_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 520972, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "lit_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 537773, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "lit_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 546369, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "lit_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 371023, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "lit_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 392802, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "lit_Latn-lav_Latn": { + "num_samples": 1997, + "number_of_characters": 527585, + "unique_pairs": 1995, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 134.29544316474713, + "max_sentence2_length": 503, + "unique_sentence2": 1994 + }, + "lit_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 551312, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "lit_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 537263, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "lit_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 536612, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "lit_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 534290, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "lit_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 547597, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "lit_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 531906, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "lit_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 511224, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "lit_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 569358, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "lit_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 525010, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "lit_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 530518, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "lit_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 350875, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "lit_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 517129, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 129.893340010015, + "max_sentence1_length": 446, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "ltz_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 549109, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "ltz_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 532895, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "ltz_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 576407, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "ltz_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 528477, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ltz_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 538560, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "ltz_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 542965, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "ltz_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 572672, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "ltz_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 529114, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "ltz_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 532201, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "ltz_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 532584, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 140.58938407611416, + "max_sentence1_length": 543, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "mal_Mlym-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 551872, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mal_Mlym-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 604657, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "mal_Mlym-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 597572, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "mal_Mlym-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 591124, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "mal_Mlym-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 624460, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "mal_Mlym-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 578276, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "mal_Mlym-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 581335, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "mal_Mlym-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 582007, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "mal_Mlym-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 613775, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "mal_Mlym-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 617792, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 152.30445668502753, + "max_sentence1_length": 540, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "mar_Deva-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 504689, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "mar_Deva-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 564735, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "mar_Deva-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 508607, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mar_Deva-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 536090, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "mar_Deva-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 505818, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "mar_Deva-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 522462, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "mar_Deva-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 526423, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "mar_Deva-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 509110, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "mar_Deva-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 511309, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "mar_Deva-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 519628, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "mar_Deva-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 481214, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "mar_Deva-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 570848, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "mar_Deva-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 508414, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "mar_Deva-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 508885, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 130.63945918878318, + "max_sentence1_length": 443, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "mey_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 445016, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "mey_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 466202, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "mey_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 461555, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mey_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 457174, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "mey_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 414131, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "mey_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 459781, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "mey_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 456371, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "mey_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 456468, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "mey_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 428748, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "mey_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 487982, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 107.07811717576365, + "max_sentence1_length": 392, + "unique_sentence1": 1993, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "mkd_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 523981, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 522801, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 537780, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 509209, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "mkd_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 515611, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mkd_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 523816, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "mkd_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 545757, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 542784, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 520860, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 520787, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520194, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "mkd_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 524032, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "mkd_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530674, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.1467200801202, + "max_sentence1_length": 451, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "mlg_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 568028, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mlg_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 620813, + "unique_pairs": 1995, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "mlg_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 613728, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "mlg_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 607280, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "mlg_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 624460, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "mlg_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 594432, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "mlg_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 597491, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "mlg_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 598163, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "mlg_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 629931, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "mlg_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 633948, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 160.39459188783175, + "max_sentence1_length": 559, + "unique_sentence1": 1994, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "mlt_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 560435, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "mlt_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 525195, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mlt_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 570583, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "mlt_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 549461, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "mlt_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 566692, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "mlt_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 554690, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "mlt_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 569960, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "mlt_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 565675, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 138.94591887831749, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "mon_Mong-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 559677, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "unique_sentence2": 1993 + }, + "mon_Mong-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 502269, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "unique_sentence2": 1992 + }, + "mon_Mong-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 506768, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mon_Mong-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 547539, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "unique_sentence2": 1996 + }, + "mon_Mong-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 526028, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "unique_sentence2": 1997 + }, + "mon_Mong-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 570902, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "unique_sentence2": 1997 + }, + "mon_Mong-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 496516, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 129.7185778668002, + "max_sentence1_length": 414, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "unique_sentence2": 1996 + }, + "mri_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 521844, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mri_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 574629, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "mri_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 567544, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "mri_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 561096, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "mri_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 578276, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "mri_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 594432, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "mri_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 551307, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "mri_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 551979, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "mri_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 583747, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "mri_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 587764, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 137.26790185277918, + "max_sentence1_length": 443, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "msa_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 524903, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "msa_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 577688, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "msa_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 570603, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "msa_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 564155, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "msa_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 581335, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "msa_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 597491, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "msa_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 551307, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "msa_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 555038, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "msa_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 586806, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "msa_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 590823, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 138.79969954932398, + "max_sentence1_length": 463, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "mya_Mymr-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 612483, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "unique_sentence2": 1993 + }, + "mya_Mymr-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 555075, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "unique_sentence2": 1992 + }, + "mya_Mymr-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 559574, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "mya_Mymr-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 600345, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "unique_sentence2": 1996 + }, + "mya_Mymr-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 578834, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "unique_sentence2": 1997 + }, + "mya_Mymr-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 570902, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "unique_sentence2": 1997 + }, + "mya_Mymr-tha_Thai": { + "num_samples": 1997, + "number_of_characters": 549322, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 156.16124186279418, + "max_sentence1_length": 773, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 118.91236855282925, + "max_sentence2_length": 439, + "unique_sentence2": 1996 + }, + "nde_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 596231, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "nde_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 545459, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "nde_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 536717, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "nde_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 526219, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "nde_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 601526, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "nde_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 582021, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "nde_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 596069, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "nde_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 597495, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.09313970956435, + "max_sentence1_length": 590, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "nep_Deva-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 492025, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "nep_Deva-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 552071, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "nep_Deva-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 495943, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "nep_Deva-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 523426, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "nep_Deva-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 493154, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "nep_Deva-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 509798, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "nep_Deva-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 513759, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "nep_Deva-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 509110, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "nep_Deva-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 498645, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "nep_Deva-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 506964, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "nep_Deva-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 468550, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "nep_Deva-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 558184, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "nep_Deva-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 495750, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "nep_Deva-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 496221, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 124.29794692038057, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "nld_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 560267, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "nld_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 523096, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "nld_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 535717, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "nld_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 544053, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "nld_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 587565, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "nld_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 591042, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "nld_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 539635, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "nld_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 549718, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "nld_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 535254, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "nld_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 561715, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "nld_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 585023, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "nld_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 492211, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "nld_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 553490, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "nld_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 570291, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "nld_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 578887, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "nld_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 554123, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "nld_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 403541, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "nld_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 425320, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "nld_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 551312, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "nld_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 572672, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "nld_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 540272, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "nld_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 543359, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "nld_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 569781, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "nld_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 569130, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "nld_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 566808, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "nld_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 580115, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "nld_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 564424, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "nld_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 543742, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "nld_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 601876, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "nld_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 557528, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "nld_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 563036, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "nld_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 383393, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "nld_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 549647, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 146.1767651477216, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "nno_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 516709, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "nno_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 500495, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "nno_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 544007, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "nno_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 496077, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "nno_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 506160, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "nno_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 510565, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "nno_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 529114, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "nno_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 540272, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "nno_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 499801, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "nno_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 500184, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.36504757135704, + "max_sentence1_length": 417, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "nob_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 519796, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "nob_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 503582, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "nob_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 547094, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "nob_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 499164, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "nob_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 509247, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "nob_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 513652, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "nob_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 532201, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "nob_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 543359, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "nob_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 499801, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "nob_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 503271, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 125.91086629944917, + "max_sentence1_length": 482, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "nso_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 459006, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "nso_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 539219, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "nso_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 561465, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "nso_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 537600, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "nso_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 528930, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "nso_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 582791, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "nso_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 579641, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "nso_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 564008, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "nso_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 456737, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "nso_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 625782, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "nso_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 531302, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "nso_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 559589, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "nso_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 607587, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "nso_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 549231, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 145.96845267901853, + "max_sentence1_length": 487, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "nya_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 582774, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "nya_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 532002, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "nya_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 523260, + "unique_pairs": 1995, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "nya_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 512762, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "nya_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 588069, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "nya_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 582021, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "nya_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 582612, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "nya_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 584038, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 142.35453179769655, + "max_sentence1_length": 464, + "unique_sentence1": 1993, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "orm_Ethi-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 404938, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "orm_Ethi-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 485151, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "orm_Ethi-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 507397, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "orm_Ethi-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 483532, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "orm_Ethi-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 528930, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "orm_Ethi-som_Latn": { + "num_samples": 1997, + "number_of_characters": 528723, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "orm_Ethi-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 525573, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "orm_Ethi-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 509940, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "orm_Ethi-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 402669, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "orm_Ethi-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 571714, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "orm_Ethi-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 477234, + "unique_pairs": 1992, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "orm_Ethi-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 505521, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "orm_Ethi-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 553519, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "orm_Ethi-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 495163, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 118.89384076114172, + "max_sentence1_length": 466, + "unique_sentence1": 1984, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "pan_Guru-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 494224, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "pan_Guru-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 554270, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "pan_Guru-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 498142, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "pan_Guru-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 525625, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "pan_Guru-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 495353, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "pan_Guru-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 511997, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "pan_Guru-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 515958, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "pan_Guru-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 511309, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "pan_Guru-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 498645, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "pan_Guru-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 509163, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "pan_Guru-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 470749, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "pan_Guru-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 560383, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "pan_Guru-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 497949, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "pan_Guru-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 498420, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 125.39909864797195, + "max_sentence1_length": 383, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "pol_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 509047, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "pol_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 533956, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "pol_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 521668, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "pol_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 532776, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "pol_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 547755, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "pol_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 519184, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "pol_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 573516, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "pol_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 576993, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "pol_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 525586, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "pol_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 521205, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "pol_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 547666, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "pol_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 570974, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "pol_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 478162, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "pol_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 539441, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "pol_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 533791, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "pol_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 556242, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "pol_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 564838, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "pol_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 389492, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "pol_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 411271, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "pol_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 537263, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "pol_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 545757, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "pol_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 569781, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "pol_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 555081, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "pol_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 552759, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "pol_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 530835, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "pol_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 530762, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "pol_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 566066, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "pol_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530169, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "pol_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 534007, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "pol_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 550375, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "pol_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 529693, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "pol_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 587827, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "pol_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 543479, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "pol_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 540649, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "pol_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 548987, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "pol_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 369344, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "pol_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 535598, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 139.14171256885328, + "max_sentence1_length": 468, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "por_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 508396, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "por_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 521017, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "por_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 560175, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "por_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 572865, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "por_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 576342, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "por_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 524935, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "por_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 520554, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "por_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 547015, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "por_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 570323, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "por_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 549201, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "por_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 477511, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "por_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 538790, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "por_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 555591, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "por_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 564187, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "por_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 566432, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "por_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 388841, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "por_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 410620, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "por_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 536612, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "por_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 554690, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "por_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 569130, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "por_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 555081, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "por_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 569700, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "por_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 552108, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "por_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 565415, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "por_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 549724, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "por_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 529042, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "por_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 587176, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "por_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 542828, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "por_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 548336, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "por_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 368693, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "por_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 534947, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 138.81572358537807, + "max_sentence1_length": 497, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "prs_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 473717, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "prs_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 494903, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "prs_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 490256, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "prs_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 485875, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "prs_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 442832, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "prs_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 488482, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "prs_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 456371, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "prs_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 485169, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "prs_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 457449, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "prs_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 516683, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.45017526289435, + "max_sentence1_length": 365, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "pus_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 473814, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "pus_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 495000, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "pus_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 490353, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "pus_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 485972, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "pus_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 442929, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "pus_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 488579, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "pus_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 456468, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "pus_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 485169, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "pus_Arab-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 457546, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "pus_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 516780, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 121.49874812218327, + "max_sentence1_length": 366, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "ron_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 575445, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "ron_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 540205, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ron_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 585593, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "ron_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 564471, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "ron_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 581702, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "ron_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 569960, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "ron_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 569700, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "ron_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 580685, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 146.4621932899349, + "max_sentence1_length": 518, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "rus_Cyrl-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 506074, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "rus_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530983, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "rus_Cyrl-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 518695, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "rus_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 529803, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "rus_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 544782, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "rus_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 516211, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "rus_Cyrl-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 570543, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "rus_Cyrl-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 574020, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "rus_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 522613, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "rus_Cyrl-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 518232, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "rus_Cyrl-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 544693, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "rus_Cyrl-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 568001, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "rus_Cyrl-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 475189, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "rus_Cyrl-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 536468, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "rus_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 530818, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "rus_Cyrl-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 553269, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "rus_Cyrl-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 561865, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "rus_Cyrl-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 386519, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "rus_Cyrl-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 408298, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "rus_Cyrl-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 534290, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "rus_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 542784, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "rus_Cyrl-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 566808, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "rus_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 552759, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "rus_Cyrl-por_Latn": { + "num_samples": 1997, + "number_of_characters": 552108, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "rus_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 527862, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "rus_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 527789, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "rus_Cyrl-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 563093, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "rus_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 527196, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "rus_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 531034, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "rus_Cyrl-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 547402, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "rus_Cyrl-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 526720, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "rus_Cyrl-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 584854, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "rus_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 540506, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "rus_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 537676, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "rus_Cyrl-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 546014, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "rus_Cyrl-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 366371, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "rus_Cyrl-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 532625, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 137.6529794692038, + "max_sentence1_length": 419, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "shi_Arab-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 446094, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "shi_Arab-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 467280, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "shi_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 462633, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "shi_Arab-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 458252, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "shi_Arab-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 415209, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "shi_Arab-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 460859, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "shi_Arab-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 428748, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "shi_Arab-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 457449, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "shi_Arab-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 457546, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "shi_Arab-tgk_Cyrl": { + "num_samples": 1997, + "number_of_characters": 489060, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 107.6179268903355, + "max_sentence1_length": 378, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 137.27941912869304, + "max_sentence2_length": 451, + "unique_sentence2": 1995 + }, + "sin_Sinh-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 502543, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "sin_Sinh-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 562589, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "sin_Sinh-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 506461, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "sin_Sinh-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 533944, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "sin_Sinh-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 503672, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "sin_Sinh-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 520316, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "sin_Sinh-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 524277, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "sin_Sinh-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 519628, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "sin_Sinh-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 506964, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "sin_Sinh-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 509163, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "sin_Sinh-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 479068, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "sin_Sinh-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 568702, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "sin_Sinh-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 506268, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "sin_Sinh-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 506739, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 129.56484727090637, + "max_sentence1_length": 441, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "slk_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 509059, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "slk_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 507879, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "slk_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522858, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "slk_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 494287, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "slk_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 500689, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "slk_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 508894, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "slk_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520860, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "slk_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 530835, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "slk_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 527862, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "slk_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 505865, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "slk_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 505272, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "slk_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 509110, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "slk_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515752, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 126.67451176765148, + "max_sentence1_length": 403, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "slv_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508986, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "slv_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 507806, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "slv_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522785, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "slv_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 494214, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "slv_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 500616, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "slv_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 508821, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "slv_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520787, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "slv_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 530762, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "slv_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 527789, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "slv_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 505865, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "slv_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 505199, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "slv_Latn-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 509037, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "slv_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515679, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.63795693540311, + "max_sentence1_length": 463, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "smo_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 525575, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "smo_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 578360, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "smo_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 571275, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "smo_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 564827, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "smo_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 582007, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "smo_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 598163, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "smo_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 551979, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "smo_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 555038, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "smo_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 587478, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "smo_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 591495, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 139.1362043064597, + "max_sentence1_length": 431, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "sna_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 596822, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "sna_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 546050, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "sna_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 537308, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "sna_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 526810, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "sna_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 602117, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "sna_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 596069, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "sna_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 582612, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "sna_Latn-ven_Latn": { + "num_samples": 1997, + "number_of_characters": 598086, + "unique_pairs": 1995, + "min_sentence1_length": 6, + "average_sentence1_length": 149.38908362543816, + "max_sentence1_length": 511, + "unique_sentence1": 1995, + "min_sentence2_length": 10, + "average_sentence2_length": 150.10315473209815, + "max_sentence2_length": 535, + "unique_sentence2": 1993 + }, + "snd_Arab-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 464129, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "snd_Arab-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 524175, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "snd_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 468047, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "snd_Arab-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 495530, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "snd_Arab-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 465258, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "snd_Arab-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 481902, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "snd_Arab-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 485863, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "snd_Arab-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 481214, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "snd_Arab-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 468550, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "snd_Arab-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 470749, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "snd_Arab-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 479068, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "snd_Arab-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 530288, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "snd_Arab-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 467854, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "snd_Arab-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 468325, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 110.32899349023535, + "max_sentence1_length": 335, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "som_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 458799, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "som_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 539012, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "som_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 561258, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "som_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 537393, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "som_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 582791, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "som_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 528723, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "som_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 579434, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "som_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 563801, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "som_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 456530, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "som_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 625575, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "som_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 531095, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "som_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 559382, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "som_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 607380, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "som_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 549024, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 145.8647971957937, + "max_sentence1_length": 455, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "spa_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 519381, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "spa_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 532002, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "spa_Latn-cat_Latn": { + "num_samples": 1997, + "number_of_characters": 571160, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 141.6925388082123, + "max_sentence2_length": 460, + "unique_sentence2": 1997 + }, + "spa_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 583850, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "spa_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 587327, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "spa_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 535920, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "spa_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 531539, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "spa_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 558000, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "spa_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 581308, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "spa_Latn-glg_Latn": { + "num_samples": 1997, + "number_of_characters": 560186, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 136.19729594391586, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "spa_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 488496, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "spa_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 549775, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "spa_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 566576, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "spa_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 575172, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "spa_Latn-ita_Latn": { + "num_samples": 1997, + "number_of_characters": 577417, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 144.82573860791186, + "max_sentence2_length": 623, + "unique_sentence2": 1996 + }, + "spa_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 399826, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "spa_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 421605, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "spa_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 547597, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "spa_Latn-mlt_Latn": { + "num_samples": 1997, + "number_of_characters": 565675, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 138.94591887831749, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "spa_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 580115, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "spa_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 566066, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "spa_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 565415, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "spa_Latn-ron_Latn": { + "num_samples": 1997, + "number_of_characters": 580685, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 146.4621932899349, + "max_sentence2_length": 518, + "unique_sentence2": 1997 + }, + "spa_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 563093, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "spa_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 560709, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "spa_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 540027, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "spa_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 598161, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "spa_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 553813, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "spa_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 559321, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "spa_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 379678, + "unique_pairs": 1996, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "spa_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 545932, + "unique_pairs": 1997, + "min_sentence1_length": 1, + "average_sentence1_length": 144.3164747120681, + "max_sentence1_length": 504, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "sqi_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 582734, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 142.01652478718077, + "max_sentence1_length": 461, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "sqi_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 531327, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 142.01652478718077, + "max_sentence1_length": 461, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "sqi_Latn-hye_Armn": { + "num_samples": 1997, + "number_of_characters": 548322, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 142.01652478718077, + "max_sentence1_length": 461, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 132.55633450175262, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "sqi_Latn-kat_Geor": { + "num_samples": 1997, + "number_of_characters": 550199, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 142.01652478718077, + "max_sentence1_length": 461, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 133.49624436654983, + "max_sentence2_length": 503, + "unique_sentence2": 1995 + }, + "srp_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508393, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "srp_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 507213, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "srp_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 522192, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "srp_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 493621, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "srp_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 500023, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "srp_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 508228, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "srp_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 520194, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "srp_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 530169, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "srp_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 527196, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "srp_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 505272, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "srp_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 505199, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "srp_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 508444, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "srp_Cyrl-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515086, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 126.34101151727592, + "max_sentence1_length": 439, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "srp_Latn-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 512231, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "srp_Latn-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 511051, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "srp_Latn-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 526030, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "srp_Latn-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 497459, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "srp_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 503861, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "srp_Latn-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 512066, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "srp_Latn-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 524032, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "srp_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 534007, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "srp_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 531034, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "srp_Latn-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 509110, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "srp_Latn-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 509037, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "srp_Latn-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 508444, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "srp_Latn-ukr_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518924, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 128.26289434151226, + "max_sentence1_length": 452, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 131.58888332498748, + "max_sentence2_length": 440, + "unique_sentence2": 1996 + }, + "ssw_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 455649, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "ssw_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 535862, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ssw_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 558108, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "ssw_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 534243, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "ssw_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 579641, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "ssw_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 525573, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "ssw_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 579434, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "ssw_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 560651, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "ssw_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 453380, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "ssw_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 622425, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "ssw_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 527945, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "ssw_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 556232, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "ssw_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 604230, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "ssw_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 545874, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 144.28743114672008, + "max_sentence1_length": 510, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "swa_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 440016, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "swa_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 503690, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "swa_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 516311, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "swa_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 568159, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "swa_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 571636, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "swa_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 520229, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "swa_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 515848, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "swa_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 542309, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "swa_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 565617, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "swa_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 542475, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "swa_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 472805, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "swa_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 534084, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "swa_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 550885, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "swa_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 518610, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "swa_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 559481, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "swa_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 384135, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "swa_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 405914, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "swa_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 531906, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "swa_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 564424, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "swa_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 564008, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "swa_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 509940, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "swa_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 550375, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "swa_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 549724, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "swa_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 547402, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "swa_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 563801, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "swa_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 560709, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "swa_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 560651, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "swa_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 524336, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "swa_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 582470, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "swa_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 437747, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "swa_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 606792, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "swa_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 538122, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "swa_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 543630, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "swa_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 512312, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "swa_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 540599, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "swa_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 588597, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "swa_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 363987, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "swa_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 530241, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 136.45918878317477, + "max_sentence1_length": 430, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "swe_Latn-afr_Latn": { + "num_samples": 1997, + "number_of_characters": 520179, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 134.37756634952427, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "swe_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 483008, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "swe_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 495629, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "swe_Latn-dan_Latn": { + "num_samples": 1997, + "number_of_characters": 503965, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 126.25838758137206, + "max_sentence2_length": 522, + "unique_sentence2": 1995 + }, + "swe_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 547477, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "swe_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 550954, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "swe_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 499547, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "swe_Latn-fao_Latn": { + "num_samples": 1997, + "number_of_characters": 509630, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0951427140711, + "max_sentence2_length": 433, + "unique_sentence2": 1997 + }, + "swe_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 495166, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "swe_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 521627, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "swe_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 544935, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "swe_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 452123, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "swe_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 513402, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "swe_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 530203, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "swe_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 538799, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "swe_Latn-isl_Latn": { + "num_samples": 1997, + "number_of_characters": 514035, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 131.30095142714072, + "max_sentence2_length": 399, + "unique_sentence2": 1996 + }, + "swe_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 363453, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "swe_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 385232, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "swe_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 511224, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "swe_Latn-ltz_Latn": { + "num_samples": 1997, + "number_of_characters": 532584, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 140.58938407611416, + "max_sentence2_length": 543, + "unique_sentence2": 1996 + }, + "swe_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 543742, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "swe_Latn-nno_Latn": { + "num_samples": 1997, + "number_of_characters": 500184, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.36504757135704, + "max_sentence2_length": 417, + "unique_sentence2": 1996 + }, + "swe_Latn-nob_Latn": { + "num_samples": 1997, + "number_of_characters": 503271, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.91086629944917, + "max_sentence2_length": 482, + "unique_sentence2": 1996 + }, + "swe_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 529693, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "swe_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 529042, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "swe_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 526720, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "swe_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 540027, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "swe_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 524336, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "swe_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 561788, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "swe_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 517440, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "swe_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 522948, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "swe_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 343305, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "swe_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 509559, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 126.10265398097145, + "max_sentence1_length": 430, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "tah_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 557343, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tah_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 610128, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "tah_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 603043, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "tah_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 596595, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "tah_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 613775, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "tah_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 629931, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "tah_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 583747, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "tah_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 586806, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "tah_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 587478, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "tah_Latn-ton_Latn": { + "num_samples": 1997, + "number_of_characters": 623263, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 155.04406609914872, + "max_sentence1_length": 524, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 157.05558337506258, + "max_sentence2_length": 468, + "unique_sentence2": 1997 + }, + "tam_Taml-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 541142, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "tam_Taml-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 553763, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "tam_Taml-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 605611, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "tam_Taml-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 613809, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "tam_Taml-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 609088, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "tam_Taml-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 557681, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tam_Taml-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 585164, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "tam_Taml-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 553300, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "tam_Taml-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 579761, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "tam_Taml-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 603069, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "tam_Taml-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 554892, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "tam_Taml-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 510257, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "tam_Taml-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 571536, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "tam_Taml-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 588337, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "tam_Taml-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 596933, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "tam_Taml-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 421587, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "tam_Taml-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 575497, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "tam_Taml-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 443366, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "tam_Taml-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 569358, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "tam_Taml-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 570848, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "tam_Taml-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 558184, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "tam_Taml-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 601876, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "tam_Taml-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 560383, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "tam_Taml-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 587827, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "tam_Taml-por_Latn": { + "num_samples": 1997, + "number_of_characters": 587176, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "tam_Taml-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 584854, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "tam_Taml-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 568702, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "tam_Taml-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 530288, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "tam_Taml-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 598161, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "tam_Taml-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 582470, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "tam_Taml-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 561788, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "tam_Taml-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 557488, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "tam_Taml-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 575574, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "tam_Taml-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 557959, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "tam_Taml-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 581082, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "tam_Taml-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 401439, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "tam_Taml-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 567693, + "unique_pairs": 1997, + "min_sentence1_length": 11, + "average_sentence1_length": 155.21331997996995, + "max_sentence1_length": 581, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "tat_Cyrl-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 515560, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "tat_Cyrl-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 492252, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "tat_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 493646, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tat_Cyrl-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 506202, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "tat_Cyrl-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 496790, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "tat_Cyrl-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 531200, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "tat_Cyrl-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 511539, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "tat_Cyrl-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 556948, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "tat_Cyrl-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 539621, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 123.14772158237356, + "max_sentence1_length": 539, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "tel_Telu-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 491329, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "tel_Telu-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 551375, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "tel_Telu-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 495247, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tel_Telu-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 522730, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "tel_Telu-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 492458, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "tel_Telu-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 509102, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "tel_Telu-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 513063, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "tel_Telu-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 508414, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "tel_Telu-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 495750, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "tel_Telu-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 497949, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "tel_Telu-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 506268, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "tel_Telu-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 467854, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "tel_Telu-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 557488, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "tel_Telu-urd_Arab": { + "num_samples": 1997, + "number_of_characters": 495525, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 123.9494241362043, + "max_sentence1_length": 412, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.18527791687531, + "max_sentence2_length": 390, + "unique_sentence2": 1996 + }, + "tgk_Cyrl-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 505328, + "unique_pairs": 1995, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "tgk_Cyrl-ckb_Arab": { + "num_samples": 1997, + "number_of_characters": 526514, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 5, + "average_sentence2_length": 126.37305958938407, + "max_sentence2_length": 399, + "unique_sentence2": 1995 + }, + "tgk_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 521867, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tgk_Cyrl-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 517486, + "unique_pairs": 1995, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "tgk_Cyrl-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 474443, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "tgk_Cyrl-kmr_Latn": { + "num_samples": 1997, + "number_of_characters": 520093, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 7, + "average_sentence2_length": 123.15773660490736, + "max_sentence2_length": 420, + "unique_sentence2": 1996 + }, + "tgk_Cyrl-mey_Arab": { + "num_samples": 1997, + "number_of_characters": 487982, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 6, + "average_sentence2_length": 107.07811717576365, + "max_sentence2_length": 392, + "unique_sentence2": 1993 + }, + "tgk_Cyrl-prs_Arab": { + "num_samples": 1997, + "number_of_characters": 516683, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.45017526289435, + "max_sentence2_length": 365, + "unique_sentence2": 1997 + }, + "tgk_Cyrl-pus_Arab": { + "num_samples": 1997, + "number_of_characters": 516780, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 8, + "average_sentence2_length": 121.49874812218327, + "max_sentence2_length": 366, + "unique_sentence2": 1996 + }, + "tgk_Cyrl-shi_Arab": { + "num_samples": 1997, + "number_of_characters": 489060, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 137.27941912869304, + "max_sentence1_length": 451, + "unique_sentence1": 1995, + "min_sentence2_length": 3, + "average_sentence2_length": 107.6179268903355, + "max_sentence2_length": 378, + "unique_sentence2": 1996 + }, + "tha_Thai-bod_Tibt": { + "num_samples": 1997, + "number_of_characters": 538097, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 150.54031046569855, + "max_sentence2_length": 478, + "unique_sentence2": 1993 + }, + "tha_Thai-dzo_Tibt": { + "num_samples": 1997, + "number_of_characters": 480689, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 121.79318978467701, + "max_sentence2_length": 411, + "unique_sentence2": 1992 + }, + "tha_Thai-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 485188, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tha_Thai-khm_Khmr": { + "num_samples": 1997, + "number_of_characters": 525959, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 144.4621932899349, + "max_sentence2_length": 517, + "unique_sentence2": 1996 + }, + "tha_Thai-lao_Laoo": { + "num_samples": 1997, + "number_of_characters": 504448, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 133.69053580370556, + "max_sentence2_length": 507, + "unique_sentence2": 1997 + }, + "tha_Thai-mon_Mong": { + "num_samples": 1997, + "number_of_characters": 496516, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 129.7185778668002, + "max_sentence2_length": 414, + "unique_sentence2": 1997 + }, + "tha_Thai-mya_Mymr": { + "num_samples": 1997, + "number_of_characters": 549322, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 118.91236855282925, + "max_sentence1_length": 439, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 156.16124186279418, + "max_sentence2_length": 773, + "unique_sentence2": 1997 + }, + "tir_Ethi-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 332745, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "tir_Ethi-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 412958, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tir_Ethi-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 435204, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "tir_Ethi-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 411339, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "tir_Ethi-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 456737, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "tir_Ethi-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 402669, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "tir_Ethi-som_Latn": { + "num_samples": 1997, + "number_of_characters": 456530, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "tir_Ethi-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 453380, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "tir_Ethi-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 437747, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "tir_Ethi-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 499521, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "tir_Ethi-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 405041, + "unique_pairs": 1996, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "tir_Ethi-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 433328, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "tir_Ethi-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 481326, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "tir_Ethi-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 422970, + "unique_pairs": 1997, + "min_sentence1_length": 5, + "average_sentence1_length": 82.743114672008, + "max_sentence1_length": 272, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "ton_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 561360, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ton_Latn-fij_Latn": { + "num_samples": 1997, + "number_of_characters": 614145, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 150.478217325989, + "max_sentence2_length": 448, + "unique_sentence2": 1988 + }, + "ton_Latn-fil_Latn": { + "num_samples": 1997, + "number_of_characters": 607060, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 146.93039559339007, + "max_sentence2_length": 554, + "unique_sentence2": 1997 + }, + "ton_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 600612, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "ton_Latn-mal_Mlym": { + "num_samples": 1997, + "number_of_characters": 617792, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 152.30445668502753, + "max_sentence2_length": 540, + "unique_sentence2": 1996 + }, + "ton_Latn-mlg_Latn": { + "num_samples": 1997, + "number_of_characters": 633948, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 160.39459188783175, + "max_sentence2_length": 559, + "unique_sentence2": 1994 + }, + "ton_Latn-mri_Latn": { + "num_samples": 1997, + "number_of_characters": 587764, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 137.26790185277918, + "max_sentence2_length": 443, + "unique_sentence2": 1997 + }, + "ton_Latn-msa_Latn": { + "num_samples": 1997, + "number_of_characters": 590823, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 138.79969954932398, + "max_sentence2_length": 463, + "unique_sentence2": 1997 + }, + "ton_Latn-smo_Latn": { + "num_samples": 1997, + "number_of_characters": 591495, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 139.1362043064597, + "max_sentence2_length": 431, + "unique_sentence2": 1996 + }, + "ton_Latn-tah_Latn": { + "num_samples": 1997, + "number_of_characters": 623263, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 157.05558337506258, + "max_sentence1_length": 468, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 155.04406609914872, + "max_sentence2_length": 524, + "unique_sentence2": 1997 + }, + "tsn_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 501790, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "tsn_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 582003, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tsn_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 604249, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "tsn_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 580384, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "tsn_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 625782, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "tsn_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 571714, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "tsn_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 625575, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "tsn_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 622425, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "tsn_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 606792, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "tsn_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 499521, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "tsn_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 574086, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "tsn_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 602373, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "tsn_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 650371, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "tsn_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 592015, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 167.39258888332498, + "max_sentence1_length": 556, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "tuk_Latn-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 554908, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "tuk_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 531600, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "tuk_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 532994, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tuk_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 545550, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "tuk_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 536138, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "tuk_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 531200, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "tuk_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 550887, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "tuk_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 596296, + "unique_pairs": 1997, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "tuk_Latn-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 578969, + "unique_pairs": 1996, + "min_sentence1_length": 9, + "average_sentence1_length": 142.85127691537306, + "max_sentence1_length": 576, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "tur_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 496794, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "tur_Latn-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 535247, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "tur_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511939, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "tur_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 509415, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "tur_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 561263, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "tur_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 564740, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "tur_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 513333, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "tur_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 508952, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "tur_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 535413, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "tur_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 558721, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "tur_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 465909, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "tur_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 527188, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "tur_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 543989, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "tur_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 552585, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "tur_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 377239, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "tur_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 525889, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "tur_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 516477, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "tur_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 399018, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "tur_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 525010, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "tur_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 557528, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "tur_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 543479, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "tur_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 542828, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "tur_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 540506, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "tur_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 553813, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "tur_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 538122, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "tur_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 517440, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "tur_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 575574, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "tur_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 511539, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "tur_Latn-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 550887, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "tur_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 576635, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "tur_Latn-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 559308, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "tur_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 536734, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "tur_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 357091, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "tur_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 523345, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 133.00600901352027, + "max_sentence1_length": 504, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "uig_Arab-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 580656, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "uig_Arab-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 557348, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "uig_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 558742, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "uig_Arab-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 571298, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "uig_Arab-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 561886, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "uig_Arab-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 556948, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "uig_Arab-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 596296, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "uig_Arab-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 576635, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "uig_Arab-uzb_Latn": { + "num_samples": 1997, + "number_of_characters": 604717, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 155.74461692538807, + "max_sentence1_length": 592, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 147.06810215322986, + "max_sentence2_length": 470, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-bel_Cyrl": { + "num_samples": 1997, + "number_of_characters": 518873, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.2373560340511, + "max_sentence2_length": 422, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-bos_Latn": { + "num_samples": 1997, + "number_of_characters": 517693, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 127.64646970455684, + "max_sentence2_length": 434, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-bul_Cyrl": { + "num_samples": 1997, + "number_of_characters": 532672, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 135.14722083124687, + "max_sentence2_length": 493, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-ces_Latn": { + "num_samples": 1997, + "number_of_characters": 504101, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 120.84026039058588, + "max_sentence2_length": 474, + "unique_sentence2": 1997 + }, + "ukr_Cyrl-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 510503, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ukr_Cyrl-hrv_Latn": { + "num_samples": 1997, + "number_of_characters": 518708, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 128.1547320981472, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "ukr_Cyrl-mkd_Cyrl": { + "num_samples": 1997, + "number_of_characters": 530674, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.1467200801202, + "max_sentence2_length": 451, + "unique_sentence2": 1997 + }, + "ukr_Cyrl-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 540649, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 537676, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-slk_Latn": { + "num_samples": 1997, + "number_of_characters": 515752, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 126.67451176765148, + "max_sentence2_length": 403, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-slv_Latn": { + "num_samples": 1997, + "number_of_characters": 515679, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.63795693540311, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "ukr_Cyrl-srp_Cyrl": { + "num_samples": 1997, + "number_of_characters": 515086, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 126.34101151727592, + "max_sentence2_length": 439, + "unique_sentence2": 1995 + }, + "ukr_Cyrl-srp_Latn": { + "num_samples": 1997, + "number_of_characters": 518924, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 131.58888332498748, + "max_sentence1_length": 440, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 128.26289434151226, + "max_sentence2_length": 452, + "unique_sentence2": 1996 + }, + "urd_Arab-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 491800, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "urd_Arab-div_Thaa": { + "num_samples": 1997, + "number_of_characters": 551846, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 152.15222834251378, + "max_sentence2_length": 609, + "unique_sentence2": 1996 + }, + "urd_Arab-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 495718, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "urd_Arab-eus_Latn": { + "num_samples": 1997, + "number_of_characters": 523201, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 137.80821231847773, + "max_sentence2_length": 393, + "unique_sentence2": 1997 + }, + "urd_Arab-guj_Gujr": { + "num_samples": 1997, + "number_of_characters": 492929, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 122.64947421131697, + "max_sentence2_length": 378, + "unique_sentence2": 1997 + }, + "urd_Arab-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 509573, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "urd_Arab-kan_Knda": { + "num_samples": 1997, + "number_of_characters": 513534, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 132.96745117676514, + "max_sentence2_length": 449, + "unique_sentence2": 1996 + }, + "urd_Arab-mar_Deva": { + "num_samples": 1997, + "number_of_characters": 508885, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 130.63945918878318, + "max_sentence2_length": 443, + "unique_sentence2": 1995 + }, + "urd_Arab-nep_Deva": { + "num_samples": 1997, + "number_of_characters": 496221, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 124.29794692038057, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "urd_Arab-pan_Guru": { + "num_samples": 1997, + "number_of_characters": 498420, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 125.39909864797195, + "max_sentence2_length": 383, + "unique_sentence2": 1996 + }, + "urd_Arab-sin_Sinh": { + "num_samples": 1997, + "number_of_characters": 506739, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 129.56484727090637, + "max_sentence2_length": 441, + "unique_sentence2": 1996 + }, + "urd_Arab-snd_Arab": { + "num_samples": 1997, + "number_of_characters": 468325, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 110.32899349023535, + "max_sentence2_length": 335, + "unique_sentence2": 1996 + }, + "urd_Arab-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 557959, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "urd_Arab-tel_Telu": { + "num_samples": 1997, + "number_of_characters": 495525, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 124.18527791687531, + "max_sentence1_length": 390, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 123.9494241362043, + "max_sentence2_length": 412, + "unique_sentence2": 1996 + }, + "uzb_Latn-aze_Latn": { + "num_samples": 1997, + "number_of_characters": 563329, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 135.0195292939409, + "max_sentence2_length": 398, + "unique_sentence2": 1997 + }, + "uzb_Latn-bak_Cyrl": { + "num_samples": 1997, + "number_of_characters": 540021, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 123.34802203304957, + "max_sentence2_length": 437, + "unique_sentence2": 1995 + }, + "uzb_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 541415, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "uzb_Latn-kaz_Cyrl": { + "num_samples": 1997, + "number_of_characters": 553971, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 130.33350025037555, + "max_sentence2_length": 473, + "unique_sentence2": 1996 + }, + "uzb_Latn-kir_Cyrl": { + "num_samples": 1997, + "number_of_characters": 544559, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 125.62043064596895, + "max_sentence2_length": 395, + "unique_sentence2": 1996 + }, + "uzb_Latn-tat_Cyrl": { + "num_samples": 1997, + "number_of_characters": 539621, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 123.14772158237356, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "uzb_Latn-tuk_Latn": { + "num_samples": 1997, + "number_of_characters": 578969, + "unique_pairs": 1996, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 142.85127691537306, + "max_sentence2_length": 576, + "unique_sentence2": 1996 + }, + "uzb_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 559308, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "uzb_Latn-uig_Arab": { + "num_samples": 1997, + "number_of_characters": 604717, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 147.06810215322986, + "max_sentence1_length": 470, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 155.74461692538807, + "max_sentence2_length": 592, + "unique_sentence2": 1996 + }, + "ven_Latn-bem_Latn": { + "num_samples": 1997, + "number_of_characters": 598248, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 8, + "average_sentence2_length": 149.47020530796195, + "max_sentence2_length": 465, + "unique_sentence2": 1997 + }, + "ven_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 547476, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "ven_Latn-ewe_Latn": { + "num_samples": 1997, + "number_of_characters": 538734, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 119.6685027541312, + "max_sentence2_length": 493, + "unique_sentence2": 1994 + }, + "ven_Latn-fuc_Latn": { + "num_samples": 1997, + "number_of_characters": 528236, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 114.4116174261392, + "max_sentence2_length": 376, + "unique_sentence2": 1996 + }, + "ven_Latn-kin_Latn": { + "num_samples": 1997, + "number_of_characters": 603543, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 10, + "average_sentence2_length": 152.12168252378567, + "max_sentence2_length": 541, + "unique_sentence2": 1996 + }, + "ven_Latn-nde_Latn": { + "num_samples": 1997, + "number_of_characters": 597495, + "unique_pairs": 1997, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 149.09313970956435, + "max_sentence2_length": 590, + "unique_sentence2": 1997 + }, + "ven_Latn-nya_Latn": { + "num_samples": 1997, + "number_of_characters": 584038, + "unique_pairs": 1996, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 10, + "average_sentence2_length": 142.35453179769655, + "max_sentence2_length": 464, + "unique_sentence2": 1993 + }, + "ven_Latn-sna_Latn": { + "num_samples": 1997, + "number_of_characters": 598086, + "unique_pairs": 1995, + "min_sentence1_length": 10, + "average_sentence1_length": 150.10315473209815, + "max_sentence1_length": 535, + "unique_sentence1": 1993, + "min_sentence2_length": 6, + "average_sentence2_length": 149.38908362543816, + "max_sentence2_length": 511, + "unique_sentence2": 1995 + }, + "vie_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 502302, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "vie_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 514923, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "vie_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 566771, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "vie_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 570248, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "vie_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 518841, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "vie_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 514460, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "vie_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 540921, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "vie_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 564229, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "vie_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 471417, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "vie_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 532696, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "vie_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 549497, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "vie_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 558093, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "vie_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 382747, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "vie_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 404526, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "vie_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 530518, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "vie_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 563036, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "vie_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 548987, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "vie_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 548336, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "vie_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 546014, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "vie_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 559321, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "vie_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 543630, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "vie_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 522948, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "vie_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 581082, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "vie_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 536734, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "vie_Latn-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 350008, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "unique_sentence2": 1996 + }, + "vie_Latn-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 356082, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "unique_sentence2": 1997 + }, + "vie_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 362599, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "vie_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 528853, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 135.764146219329, + "max_sentence1_length": 437, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "wol_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 407310, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "wol_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 487523, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "wol_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 509769, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "wol_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 485904, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "wol_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 531302, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "wol_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 477234, + "unique_pairs": 1992, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "wol_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 531095, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "wol_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 527945, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "wol_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 512312, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "wol_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 405041, + "unique_pairs": 1996, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "wol_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 574086, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "wol_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 507893, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "wol_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 555891, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "wol_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 497535, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 120.08162243365048, + "max_sentence1_length": 405, + "unique_sentence1": 1990, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "xho_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 435597, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "xho_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 515810, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "xho_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 538056, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "xho_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 514191, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "xho_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 559589, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "xho_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 505521, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "xho_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 559382, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "xho_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 556232, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "xho_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 540599, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "xho_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 433328, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "xho_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 602373, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "xho_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 507893, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "xho_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 584178, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "xho_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 525822, + "unique_pairs": 1997, + "min_sentence1_length": 6, + "average_sentence1_length": 134.2463695543315, + "max_sentence1_length": 492, + "unique_sentence1": 1997, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "yor_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 483595, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "yor_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 563808, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "yor_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 586054, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "yor_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 562189, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "yor_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 607587, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "yor_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 553519, + "unique_pairs": 1996, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "yor_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 607380, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "yor_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 604230, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "yor_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 588597, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "yor_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 481326, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "yor_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 650371, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "yor_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 555891, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "yor_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 584178, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "yor_Latn-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 573820, + "unique_pairs": 1997, + "min_sentence1_length": 7, + "average_sentence1_length": 158.2814221331998, + "max_sentence1_length": 582, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "yue_Hant-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 326607, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "yue_Hant-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 190513, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "yue_Hant-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 212292, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "yue_Hant-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 350008, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "yue_Hant-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 163848, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "unique_sentence2": 1997 + }, + "yue_Hant-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 170365, + "unique_pairs": 1996, + "min_sentence1_length": 4, + "average_sentence1_length": 39.502754131196795, + "max_sentence1_length": 133, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "zho_Hans-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 332681, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "zho_Hans-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 196587, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "zho_Hans-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 218366, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "unique_sentence1": 1997, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "zho_Hans-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 356082, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "unique_sentence1": 1997, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "zho_Hans-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 163848, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "unique_sentence1": 1997, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "unique_sentence2": 1996 + }, + "zho_Hans-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 176439, + "unique_pairs": 1997, + "min_sentence1_length": 4, + "average_sentence1_length": 42.54431647471207, + "max_sentence1_length": 263, + "unique_sentence1": 1997, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + }, + "zho_Hant-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 322659, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "zho_Hant-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 335280, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "zho_Hant-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 387128, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "zho_Hant-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 390605, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "zho_Hant-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 339198, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "zho_Hant-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 334817, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "zho_Hant-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 361278, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "zho_Hant-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 384586, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "zho_Hant-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 291774, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "zho_Hant-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 353053, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "zho_Hant-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 369854, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "zho_Hant-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 378450, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "zho_Hant-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 203104, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "zho_Hant-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 224883, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "zho_Hant-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 350875, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "zho_Hant-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 383393, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "zho_Hant-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 369344, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "zho_Hant-por_Latn": { + "num_samples": 1997, + "number_of_characters": 368693, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "zho_Hant-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 366371, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "zho_Hant-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 379678, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "zho_Hant-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 363987, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "zho_Hant-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 343305, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "zho_Hant-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 401439, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "zho_Hant-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 357091, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "zho_Hant-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 362599, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "zho_Hant-yue_Hant": { + "num_samples": 1997, + "number_of_characters": 170365, + "unique_pairs": 1996, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 39.502754131196795, + "max_sentence2_length": 133, + "unique_sentence2": 1996 + }, + "zho_Hant-zho_Hans": { + "num_samples": 1997, + "number_of_characters": 176439, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 42.54431647471207, + "max_sentence2_length": 263, + "unique_sentence2": 1997 + }, + "zho_Hant-zul_Latn": { + "num_samples": 1997, + "number_of_characters": 349210, + "unique_pairs": 1997, + "min_sentence1_length": 3, + "average_sentence1_length": 45.80771156735103, + "max_sentence1_length": 200, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 129.0595893840761, + "max_sentence2_length": 494, + "unique_sentence2": 1996 + }, + "zul_Latn-amh_Ethi": { + "num_samples": 1997, + "number_of_characters": 425239, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 83.87931897846771, + "max_sentence2_length": 290, + "unique_sentence2": 1994 + }, + "zul_Latn-arb_Arab": { + "num_samples": 1997, + "number_of_characters": 488913, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 115.76414621932899, + "max_sentence2_length": 362, + "unique_sentence2": 1995 + }, + "zul_Latn-ben_Beng": { + "num_samples": 1997, + "number_of_characters": 501534, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 122.08412618928392, + "max_sentence2_length": 402, + "unique_sentence2": 1997 + }, + "zul_Latn-deu_Latn": { + "num_samples": 1997, + "number_of_characters": 553382, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 148.04707060590886, + "max_sentence2_length": 508, + "unique_sentence2": 1996 + }, + "zul_Latn-ell_Grek": { + "num_samples": 1997, + "number_of_characters": 556859, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 149.78818227341011, + "max_sentence2_length": 584, + "unique_sentence2": 1996 + }, + "zul_Latn-eng_Latn": { + "num_samples": 1997, + "number_of_characters": 505452, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 124.04606910365548, + "max_sentence2_length": 437, + "unique_sentence2": 1997 + }, + "zul_Latn-fas_Arab": { + "num_samples": 1997, + "number_of_characters": 501071, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 121.85227841762644, + "max_sentence2_length": 389, + "unique_sentence2": 1995 + }, + "zul_Latn-fin_Latn": { + "num_samples": 1997, + "number_of_characters": 527532, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.10265398097147, + "max_sentence2_length": 463, + "unique_sentence2": 1996 + }, + "zul_Latn-fra_Latn": { + "num_samples": 1997, + "number_of_characters": 550840, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.7741612418628, + "max_sentence2_length": 512, + "unique_sentence2": 1996 + }, + "zul_Latn-hau_Latn": { + "num_samples": 1997, + "number_of_characters": 527698, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 135.185778668002, + "max_sentence2_length": 483, + "unique_sentence2": 1997 + }, + "zul_Latn-heb_Hebr": { + "num_samples": 1997, + "number_of_characters": 458028, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 100.29844767150726, + "max_sentence2_length": 375, + "unique_sentence2": 1996 + }, + "zul_Latn-hin_Deva": { + "num_samples": 1997, + "number_of_characters": 519307, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 130.9839759639459, + "max_sentence2_length": 394, + "unique_sentence2": 1996 + }, + "zul_Latn-hun_Latn": { + "num_samples": 1997, + "number_of_characters": 536108, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 139.3970956434652, + "max_sentence2_length": 508, + "unique_sentence2": 1997 + }, + "zul_Latn-ibo_Latn": { + "num_samples": 1997, + "number_of_characters": 503833, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 123.23535302954431, + "max_sentence2_length": 469, + "unique_sentence2": 1997 + }, + "zul_Latn-ind_Latn": { + "num_samples": 1997, + "number_of_characters": 544704, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 143.70155232849274, + "max_sentence2_length": 486, + "unique_sentence2": 1997 + }, + "zul_Latn-jpn_Jpan": { + "num_samples": 1997, + "number_of_characters": 369358, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 4, + "average_sentence2_length": 55.89684526790185, + "max_sentence2_length": 189, + "unique_sentence2": 1994 + }, + "zul_Latn-kor_Hang": { + "num_samples": 1997, + "number_of_characters": 391137, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 66.80270405608412, + "max_sentence2_length": 217, + "unique_sentence2": 1995 + }, + "zul_Latn-lit_Latn": { + "num_samples": 1997, + "number_of_characters": 517129, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 129.893340010015, + "max_sentence2_length": 446, + "unique_sentence2": 1995 + }, + "zul_Latn-nld_Latn": { + "num_samples": 1997, + "number_of_characters": 549647, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 146.1767651477216, + "max_sentence2_length": 539, + "unique_sentence2": 1996 + }, + "zul_Latn-nso_Latn": { + "num_samples": 1997, + "number_of_characters": 549231, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 145.96845267901853, + "max_sentence2_length": 487, + "unique_sentence2": 1996 + }, + "zul_Latn-orm_Ethi": { + "num_samples": 1997, + "number_of_characters": 495163, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 118.89384076114172, + "max_sentence2_length": 466, + "unique_sentence2": 1984 + }, + "zul_Latn-pol_Latn": { + "num_samples": 1997, + "number_of_characters": 535598, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 9, + "average_sentence2_length": 139.14171256885328, + "max_sentence2_length": 468, + "unique_sentence2": 1996 + }, + "zul_Latn-por_Latn": { + "num_samples": 1997, + "number_of_characters": 534947, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 138.81572358537807, + "max_sentence2_length": 497, + "unique_sentence2": 1996 + }, + "zul_Latn-rus_Cyrl": { + "num_samples": 1997, + "number_of_characters": 532625, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 137.6529794692038, + "max_sentence2_length": 419, + "unique_sentence2": 1996 + }, + "zul_Latn-som_Latn": { + "num_samples": 1997, + "number_of_characters": 549024, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 145.8647971957937, + "max_sentence2_length": 455, + "unique_sentence2": 1997 + }, + "zul_Latn-spa_Latn": { + "num_samples": 1997, + "number_of_characters": 545932, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 1, + "average_sentence2_length": 144.3164747120681, + "max_sentence2_length": 504, + "unique_sentence2": 1996 + }, + "zul_Latn-ssw_Latn": { + "num_samples": 1997, + "number_of_characters": 545874, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 144.28743114672008, + "max_sentence2_length": 510, + "unique_sentence2": 1996 + }, + "zul_Latn-swa_Latn": { + "num_samples": 1997, + "number_of_characters": 530241, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 10, + "average_sentence2_length": 136.45918878317477, + "max_sentence2_length": 430, + "unique_sentence2": 1997 + }, + "zul_Latn-swe_Latn": { + "num_samples": 1997, + "number_of_characters": 509559, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 8, + "average_sentence2_length": 126.10265398097145, + "max_sentence2_length": 430, + "unique_sentence2": 1996 + }, + "zul_Latn-tam_Taml": { + "num_samples": 1997, + "number_of_characters": 567693, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 11, + "average_sentence2_length": 155.21331997996995, + "max_sentence2_length": 581, + "unique_sentence2": 1997 + }, + "zul_Latn-tir_Ethi": { + "num_samples": 1997, + "number_of_characters": 422970, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 5, + "average_sentence2_length": 82.743114672008, + "max_sentence2_length": 272, + "unique_sentence2": 1996 + }, + "zul_Latn-tsn_Latn": { + "num_samples": 1997, + "number_of_characters": 592015, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 167.39258888332498, + "max_sentence2_length": 556, + "unique_sentence2": 1997 + }, + "zul_Latn-tur_Latn": { + "num_samples": 1997, + "number_of_characters": 523345, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 133.00600901352027, + "max_sentence2_length": 504, + "unique_sentence2": 1997 + }, + "zul_Latn-vie_Latn": { + "num_samples": 1997, + "number_of_characters": 528853, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 135.764146219329, + "max_sentence2_length": 437, + "unique_sentence2": 1996 + }, + "zul_Latn-wol_Latn": { + "num_samples": 1997, + "number_of_characters": 497535, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 120.08162243365048, + "max_sentence2_length": 405, + "unique_sentence2": 1990 + }, + "zul_Latn-xho_Latn": { + "num_samples": 1997, + "number_of_characters": 525822, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 6, + "average_sentence2_length": 134.2463695543315, + "max_sentence2_length": 492, + "unique_sentence2": 1997 + }, + "zul_Latn-yor_Latn": { + "num_samples": 1997, + "number_of_characters": 573820, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 7, + "average_sentence2_length": 158.2814221331998, + "max_sentence2_length": 582, + "unique_sentence2": 1996 + }, + "zul_Latn-zho_Hant": { + "num_samples": 1997, + "number_of_characters": 349210, + "unique_pairs": 1997, + "min_sentence1_length": 8, + "average_sentence1_length": 129.0595893840761, + "max_sentence1_length": 494, + "unique_sentence1": 1996, + "min_sentence2_length": 3, + "average_sentence2_length": 45.80771156735103, + "max_sentence2_length": 200, + "unique_sentence2": 1996 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json b/mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json new file mode 100644 index 0000000000..754f13c767 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/NollySentiBitextMining.json @@ -0,0 +1,69 @@ +{ + "train": { + "num_samples": 1640, + "number_of_characters": 445805, + "unique_pairs": 1632, + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "unique_sentence1": 405, + "min_sentence2_length": 3, + "average_sentence2_length": 135.515243902439, + "max_sentence2_length": 1728, + "unique_sentence2": 1631, + "hf_subset_descriptive_stats": { + "en-ha": { + "num_samples": 410, + "number_of_characters": 115348, + "unique_pairs": 407, + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "unique_sentence1": 405, + "min_sentence2_length": 4, + "average_sentence2_length": 145.01951219512196, + "max_sentence2_length": 1728, + "unique_sentence2": 407 + }, + "en-ig": { + "num_samples": 410, + "number_of_characters": 107173, + "unique_pairs": 409, + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "unique_sentence1": 405, + "min_sentence2_length": 5, + "average_sentence2_length": 125.08048780487805, + "max_sentence2_length": 1137, + "unique_sentence2": 408 + }, + "en-pcm": { + "num_samples": 410, + "number_of_characters": 109955, + "unique_pairs": 408, + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "unique_sentence1": 405, + "min_sentence2_length": 3, + "average_sentence2_length": 131.8658536585366, + "max_sentence2_length": 1552, + "unique_sentence2": 408 + }, + "en-yo": { + "num_samples": 410, + "number_of_characters": 113329, + "unique_pairs": 409, + "min_sentence1_length": 3, + "average_sentence1_length": 136.3170731707317, + "max_sentence1_length": 1698, + "unique_sentence1": 405, + "min_sentence2_length": 6, + "average_sentence2_length": 140.0951219512195, + "max_sentence2_length": 1338, + "unique_sentence2": 409 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json b/mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json new file mode 100644 index 0000000000..96403e4c83 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/NorwegianCourtsBitextMining.json @@ -0,0 +1,15 @@ +{ + "test": { + "num_samples": 228, + "number_of_characters": 37441, + "unique_pairs": 228, + "min_sentence1_length": 13, + "average_sentence1_length": 82.19736842105263, + "max_sentence1_length": 272, + "unique_sentence1": 227, + "min_sentence2_length": 10, + "average_sentence2_length": 82.01754385964912, + "max_sentence2_length": 269, + "unique_sentence2": 226 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json b/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json index 60a8e055c4..9efdf2f8d7 100644 --- a/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json +++ b/mteb/descriptive_stats/BitextMining/NusaTranslationBitextMining.json @@ -1,75 +1,159 @@ { "train": { - "average_sentence1_length": 145.4552390438247, - "average_sentence2_length": 148.56607569721115, "num_samples": 50200, "number_of_characters": 14759870, + "unique_pairs": 50140, + "min_sentence1_length": 5, + "average_sentence1_length": 145.4552390438247, + "max_sentence1_length": 873, + "unique_sentence1": 8258, + "min_sentence2_length": 5, + "average_sentence2_length": 148.56607569721115, + "max_sentence2_length": 980, + "unique_sentence2": 50102, "hf_subset_descriptive_stats": { "ind-abs": { + "num_samples": 1000, + "number_of_characters": 295680, + "unique_pairs": 999, + "min_sentence1_length": 5, "average_sentence1_length": 148.366, + "max_sentence1_length": 727, + "unique_sentence1": 998, + "min_sentence2_length": 6, "average_sentence2_length": 147.314, - "num_samples": 1000, - "number_of_characters": 295680 + "max_sentence2_length": 629, + "unique_sentence2": 998 }, "ind-btk": { + "num_samples": 6600, + "number_of_characters": 1927907, + "unique_pairs": 6597, + "min_sentence1_length": 5, "average_sentence1_length": 145.36666666666667, + "max_sentence1_length": 873, + "unique_sentence1": 6521, + "min_sentence2_length": 5, "average_sentence2_length": 146.74045454545455, - "num_samples": 6600, - "number_of_characters": 1927907 + "max_sentence2_length": 980, + "unique_sentence2": 6596 }, "ind-bew": { + "num_samples": 6600, + "number_of_characters": 1939300, + "unique_pairs": 6595, + "min_sentence1_length": 5, "average_sentence1_length": 145.4280303030303, + "max_sentence1_length": 873, + "unique_sentence1": 6512, + "min_sentence2_length": 6, "average_sentence2_length": 148.40530303030303, - "num_samples": 6600, - "number_of_characters": 1939300 + "max_sentence2_length": 840, + "unique_sentence2": 6590 }, "ind-bhp": { + "num_samples": 1000, + "number_of_characters": 261666, + "unique_pairs": 1000, + "min_sentence1_length": 11, "average_sentence1_length": 133.528, + "max_sentence1_length": 468, + "unique_sentence1": 999, + "min_sentence2_length": 10, "average_sentence2_length": 128.138, - "num_samples": 1000, - "number_of_characters": 261666 + "max_sentence2_length": 459, + "unique_sentence2": 999 }, "ind-jav": { + "num_samples": 6600, + "number_of_characters": 1922162, + "unique_pairs": 6594, + "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, + "max_sentence1_length": 873, + "unique_sentence1": 6512, + "min_sentence2_length": 5, "average_sentence2_length": 145.8089393939394, - "num_samples": 6600, - "number_of_characters": 1922162 + "max_sentence2_length": 854, + "unique_sentence2": 6585 }, "ind-mad": { + "num_samples": 6600, + "number_of_characters": 1973257, + "unique_pairs": 6598, + "min_sentence1_length": 5, "average_sentence1_length": 145.35545454545453, + "max_sentence1_length": 873, + "unique_sentence1": 6521, + "min_sentence2_length": 5, "average_sentence2_length": 153.6228787878788, - "num_samples": 6600, - "number_of_characters": 1973257 + "max_sentence2_length": 827, + "unique_sentence2": 6592 }, "ind-mak": { + "num_samples": 6600, + "number_of_characters": 1953868, + "unique_pairs": 6594, + "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, + "max_sentence1_length": 873, + "unique_sentence1": 6512, + "min_sentence2_length": 6, "average_sentence2_length": 150.6128787878788, - "num_samples": 6600, - "number_of_characters": 1953868 + "max_sentence2_length": 888, + "unique_sentence2": 6586 }, "ind-min": { + "num_samples": 6600, + "number_of_characters": 1937033, + "unique_pairs": 6595, + "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, + "max_sentence1_length": 873, + "unique_sentence1": 6512, + "min_sentence2_length": 6, "average_sentence2_length": 148.0621212121212, - "num_samples": 6600, - "number_of_characters": 1937033 + "max_sentence2_length": 837, + "unique_sentence2": 6591 }, "ind-mui": { + "num_samples": 1000, + "number_of_characters": 301448, + "unique_pairs": 1000, + "min_sentence1_length": 11, "average_sentence1_length": 150.454, + "max_sentence1_length": 451, + "unique_sentence1": 997, + "min_sentence2_length": 11, "average_sentence2_length": 150.994, - "num_samples": 1000, - "number_of_characters": 301448 + "max_sentence2_length": 450, + "unique_sentence2": 1000 }, "ind-rej": { + "num_samples": 1000, + "number_of_characters": 291205, + "unique_pairs": 1000, + "min_sentence1_length": 9, "average_sentence1_length": 151.622, + "max_sentence1_length": 873, + "unique_sentence1": 998, + "min_sentence2_length": 8, "average_sentence2_length": 139.583, - "num_samples": 1000, - "number_of_characters": 291205 + "max_sentence2_length": 784, + "unique_sentence2": 1000 }, "ind-sun": { + "num_samples": 6600, + "number_of_characters": 1956344, + "unique_pairs": 6591, + "min_sentence1_length": 5, "average_sentence1_length": 145.42772727272728, + "max_sentence1_length": 873, + "unique_sentence1": 6512, + "min_sentence2_length": 5, "average_sentence2_length": 150.9880303030303, - "num_samples": 6600, - "number_of_characters": 1956344 + "max_sentence2_length": 881, + "unique_sentence2": 6588 } } } diff --git a/mteb/descriptive_stats/BitextMining/PhincBitextMining.json b/mteb/descriptive_stats/BitextMining/PhincBitextMining.json new file mode 100644 index 0000000000..f4b237d87d --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/PhincBitextMining.json @@ -0,0 +1,30 @@ +{ + "train": { + "num_samples": 13738, + "number_of_characters": 2069457, + "unique_pairs": 13737, + "min_sentence1_length": 1, + "average_sentence1_length": 74.02300189256079, + "max_sentence1_length": 278, + "unique_sentence1": 13515, + "min_sentence2_length": 3, + "average_sentence2_length": 76.61442713640996, + "max_sentence2_length": 274, + "unique_sentence2": 13736, + "hf_subset_descriptive_stats": { + "eng-eng_hin": { + "num_samples": 13738, + "number_of_characters": 2069457, + "unique_pairs": 13737, + "min_sentence1_length": 1, + "average_sentence1_length": 74.02300189256079, + "max_sentence1_length": 278, + "unique_sentence1": 13515, + "min_sentence2_length": 3, + "average_sentence2_length": 76.61442713640996, + "max_sentence2_length": 274, + "unique_sentence2": 13736 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json b/mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json new file mode 100644 index 0000000000..12f4003727 --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/TbilisiCityHallBitextMining.json @@ -0,0 +1,43 @@ +{ + "test": { + "num_samples": 3640, + "number_of_characters": 572146, + "unique_pairs": 3640, + "min_sentence1_length": 13, + "average_sentence1_length": 78.59148351648352, + "max_sentence1_length": 203, + "unique_sentence1": 3636, + "min_sentence2_length": 13, + "average_sentence2_length": 78.59148351648352, + "max_sentence2_length": 203, + "unique_sentence2": 3636, + "hf_subset_descriptive_stats": { + "kat_Geor-eng_Latn": { + "num_samples": 1820, + "number_of_characters": 286073, + "unique_pairs": 1820, + "min_sentence1_length": 30, + "average_sentence1_length": 76.06593406593407, + "max_sentence1_length": 189, + "unique_sentence1": 1820, + "min_sentence2_length": 13, + "average_sentence2_length": 81.11703296703297, + "max_sentence2_length": 203, + "unique_sentence2": 1816 + }, + "eng_Latn-kat_Geor": { + "num_samples": 1820, + "number_of_characters": 286073, + "unique_pairs": 1820, + "min_sentence1_length": 13, + "average_sentence1_length": 81.11703296703297, + "max_sentence1_length": 203, + "unique_sentence1": 1816, + "min_sentence2_length": 30, + "average_sentence2_length": 76.06593406593407, + "max_sentence2_length": 189, + "unique_sentence2": 1820 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json b/mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json new file mode 100644 index 0000000000..2d97df573e --- /dev/null +++ b/mteb/descriptive_stats/BitextMining/VieMedEVBitextMining.json @@ -0,0 +1,15 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 575910, + "unique_pairs": 2048, + "min_sentence1_length": 11, + "average_sentence1_length": 139.22802734375, + "max_sentence1_length": 1291, + "unique_sentence1": 2048, + "min_sentence2_length": 11, + "average_sentence2_length": 141.97802734375, + "max_sentence2_length": 1217, + "unique_sentence2": 2047 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/LanguageClassification.json b/mteb/descriptive_stats/Classification/LanguageClassification.json index cf8b83d5d1..6622d23be1 100644 --- a/mteb/descriptive_stats/Classification/LanguageClassification.json +++ b/mteb/descriptive_stats/Classification/LanguageClassification.json @@ -2,7 +2,11 @@ "test": { "num_samples": 2048, "number_of_characters": 224352, + "num_texts_in_train": 31, + "min_text_length": 14, "average_text_length": 109.546875, + "max_text_length": 1270, + "unique_text": 2025, "unique_labels": 20, "labels": { "17": { @@ -66,5 +70,77 @@ "count": 103 } } + }, + "train": { + "num_samples": 70000, + "number_of_characters": 7760299, + "num_texts_in_train": null, + "min_text_length": 2, + "average_text_length": 110.86141428571429, + "max_text_length": 2422, + "unique_text": 68978, + "unique_labels": 20, + "labels": { + "12": { + "count": 3500 + }, + "1": { + "count": 3500 + }, + "19": { + "count": 3500 + }, + "15": { + "count": 3500 + }, + "13": { + "count": 3500 + }, + "11": { + "count": 3500 + }, + "17": { + "count": 3500 + }, + "14": { + "count": 3500 + }, + "16": { + "count": 3500 + }, + "5": { + "count": 3500 + }, + "0": { + "count": 3500 + }, + "8": { + "count": 3500 + }, + "7": { + "count": 3500 + }, + "2": { + "count": 3500 + }, + "3": { + "count": 3500 + }, + "10": { + "count": 3500 + }, + "6": { + "count": 3500 + }, + "18": { + "count": 3500 + }, + "4": { + "count": 3500 + }, + "9": { + "count": 3500 + } + } } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json b/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json index 23225ae223..63fcfd3e51 100644 --- a/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json +++ b/mteb/descriptive_stats/Classification/SlovakHateSpeechClassification.json @@ -2,7 +2,11 @@ "test": { "num_samples": 1319, "number_of_characters": 122279, + "num_texts_in_train": 46, + "min_text_length": 8, "average_text_length": 92.70583775587566, + "max_text_length": 1584, + "unique_text": 1315, "unique_labels": 2, "labels": { "1": { @@ -12,5 +16,23 @@ "count": 959 } } + }, + "train": { + "num_samples": 11870, + "number_of_characters": 1130860, + "num_texts_in_train": null, + "min_text_length": 7, + "average_text_length": 95.27042965459141, + "max_text_length": 2112, + "unique_text": 11655, + "unique_labels": 2, + "labels": { + "1": { + "count": 3245 + }, + "0": { + "count": 8625 + } + } } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringP2P.json b/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringP2P.json index 8a5118e0c6..e6066a83c2 100644 --- a/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringP2P.json +++ b/mteb/descriptive_stats/Clustering/ArXivHierarchicalClusteringP2P.json @@ -2,8 +2,12 @@ "test": { "num_samples": 2048, "number_of_characters": 2065284, + "min_text_length": 103, "average_text_length": 1008.439453125, + "max_text_length": 2103, + "min_labels_per_text": 1, "average_labels_per_text": 1.46337890625, + "max_labels_per_text": 381, "unique_labels": 129, "labels": { "cs": { diff --git a/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json b/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json index f1dda79201..2d9a0a01bb 100644 --- a/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json +++ b/mteb/descriptive_stats/Clustering/BiorxivClusteringS2S.json @@ -2,8 +2,13 @@ "test": { "num_samples": 10, "number_of_characters": 75000, + "min_text_length": 5000, "average_text_length": 7500.0, + "max_text_length": 10000, + "unique_texts": 41555, + "min_labels_per_text": 1, "average_labels_per_text": 7500.0, + "max_labels_per_text": 14251, "unique_labels": 26, "labels": { "neuroscience": { diff --git a/mteb/descriptive_stats/Clustering/MedrxivClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/MedrxivClusteringP2P.v2.json new file mode 100644 index 0000000000..0370d5147e --- /dev/null +++ b/mteb/descriptive_stats/Clustering/MedrxivClusteringP2P.v2.json @@ -0,0 +1,168 @@ +{ + "test": { + "num_samples": 37500, + "number_of_characters": 74294927, + "min_text_length": 148, + "average_text_length": 1981.1980533333333, + "max_text_length": 38759, + "min_labels_per_text": 6, + "average_labels_per_text": 1.0, + "max_labels_per_text": 8830, + "unique_labels": 51, + "labels": { + "epidemiology": { + "count": 6656 + }, + "public and global health": { + "count": 3595 + }, + "oncology": { + "count": 845 + }, + "allergy and immunology": { + "count": 464 + }, + "orthopedics": { + "count": 104 + }, + "health informatics": { + "count": 1107 + }, + "occupational and environmental health": { + "count": 415 + }, + "infectious diseases": { + "count": 8830 + }, + "genetic and genomic medicine": { + "count": 1918 + }, + "health policy": { + "count": 527 + }, + "gastroenterology": { + "count": 343 + }, + "radiology and imaging": { + "count": 541 + }, + "pain medicine": { + "count": 121 + }, + "neurology": { + "count": 1773 + }, + "primary care research": { + "count": 232 + }, + "rheumatology": { + "count": 189 + }, + "endocrinology": { + "count": 419 + }, + "hematology": { + "count": 202 + }, + "addiction medicine": { + "count": 178 + }, + "pediatrics": { + "count": 589 + }, + "cardiovascular medicine": { + "count": 855 + }, + "obstetrics and gynecology": { + "count": 373 + }, + "health systems and quality improvement": { + "count": 491 + }, + "nephrology": { + "count": 241 + }, + "respiratory medicine": { + "count": 482 + }, + "geriatric medicine": { + "count": 169 + }, + "dentistry and oral medicine": { + "count": 159 + }, + "psychiatry and clinical psychology": { + "count": 1781 + }, + "nutrition": { + "count": 240 + }, + "intensive care and critical care medicine": { + "count": 368 + }, + "rehabilitation medicine and physical therapy": { + "count": 322 + }, + "otolaryngology": { + "count": 166 + }, + "nursing": { + "count": 93 + }, + "transplantation": { + "count": 118 + }, + "health economics": { + "count": 327 + }, + "sports medicine": { + "count": 180 + }, + "hiv aids": { + "count": 363 + }, + "dermatology": { + "count": 98 + }, + "pathology": { + "count": 223 + }, + "emergency medicine": { + "count": 191 + }, + "pharmacology and therapeutics": { + "count": 221 + }, + "ophthalmology": { + "count": 220 + }, + "medical ethics": { + "count": 46 + }, + "palliative medicine": { + "count": 45 + }, + "sexual and reproductive health": { + "count": 156 + }, + "medical education": { + "count": 203 + }, + "surgery": { + "count": 162 + }, + "urology": { + "count": 65 + }, + "anesthesia": { + "count": 72 + }, + "toxicology": { + "count": 16 + }, + "forensic medicine": { + "count": 6 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/MedrxivClusteringS2S.v2.json b/mteb/descriptive_stats/Clustering/MedrxivClusteringS2S.v2.json new file mode 100644 index 0000000000..7b55ddd4dc --- /dev/null +++ b/mteb/descriptive_stats/Clustering/MedrxivClusteringS2S.v2.json @@ -0,0 +1,168 @@ +{ + "test": { + "num_samples": 37500, + "number_of_characters": 4301276, + "min_text_length": 18, + "average_text_length": 114.70069333333333, + "max_text_length": 339, + "min_labels_per_text": 6, + "average_labels_per_text": 1.0, + "max_labels_per_text": 8830, + "unique_labels": 51, + "labels": { + "epidemiology": { + "count": 6656 + }, + "public and global health": { + "count": 3595 + }, + "oncology": { + "count": 845 + }, + "allergy and immunology": { + "count": 464 + }, + "orthopedics": { + "count": 104 + }, + "health informatics": { + "count": 1107 + }, + "occupational and environmental health": { + "count": 415 + }, + "infectious diseases": { + "count": 8830 + }, + "genetic and genomic medicine": { + "count": 1918 + }, + "health policy": { + "count": 527 + }, + "gastroenterology": { + "count": 343 + }, + "radiology and imaging": { + "count": 541 + }, + "pain medicine": { + "count": 121 + }, + "neurology": { + "count": 1773 + }, + "primary care research": { + "count": 232 + }, + "rheumatology": { + "count": 189 + }, + "endocrinology": { + "count": 419 + }, + "hematology": { + "count": 202 + }, + "addiction medicine": { + "count": 178 + }, + "pediatrics": { + "count": 589 + }, + "cardiovascular medicine": { + "count": 855 + }, + "obstetrics and gynecology": { + "count": 373 + }, + "health systems and quality improvement": { + "count": 491 + }, + "nephrology": { + "count": 241 + }, + "respiratory medicine": { + "count": 482 + }, + "geriatric medicine": { + "count": 169 + }, + "dentistry and oral medicine": { + "count": 159 + }, + "psychiatry and clinical psychology": { + "count": 1781 + }, + "nutrition": { + "count": 240 + }, + "intensive care and critical care medicine": { + "count": 368 + }, + "rehabilitation medicine and physical therapy": { + "count": 322 + }, + "otolaryngology": { + "count": 166 + }, + "nursing": { + "count": 93 + }, + "transplantation": { + "count": 118 + }, + "health economics": { + "count": 327 + }, + "sports medicine": { + "count": 180 + }, + "hiv aids": { + "count": 363 + }, + "dermatology": { + "count": 98 + }, + "pathology": { + "count": 223 + }, + "emergency medicine": { + "count": 191 + }, + "pharmacology and therapeutics": { + "count": 221 + }, + "ophthalmology": { + "count": 220 + }, + "medical ethics": { + "count": 46 + }, + "palliative medicine": { + "count": 45 + }, + "sexual and reproductive health": { + "count": 156 + }, + "medical education": { + "count": 203 + }, + "surgery": { + "count": 162 + }, + "urology": { + "count": 65 + }, + "anesthesia": { + "count": 72 + }, + "toxicology": { + "count": 16 + }, + "forensic medicine": { + "count": 6 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/RedditClusteringP2P.v2.json b/mteb/descriptive_stats/Clustering/RedditClusteringP2P.v2.json new file mode 100644 index 0000000000..ba997dbefc --- /dev/null +++ b/mteb/descriptive_stats/Clustering/RedditClusteringP2P.v2.json @@ -0,0 +1,1335 @@ +{ + "test": { + "num_samples": 459389, + "number_of_characters": 334286895, + "min_text_length": 79, + "average_text_length": 727.6771864367671, + "max_text_length": 4359, + "min_labels_per_text": 2, + "average_labels_per_text": 1.0, + "max_labels_per_text": 77908, + "unique_labels": 440, + "labels": { + "FortNiteBR": { + "count": 436 + }, + "buildapc": { + "count": 8484 + }, + "offmychest": { + "count": 570 + }, + "nus": { + "count": 45 + }, + "relationship_advice": { + "count": 16651 + }, + "premed": { + "count": 201 + }, + "dogecoin": { + "count": 8108 + }, + "GamingLaptops": { + "count": 183 + }, + "asktransgender": { + "count": 326 + }, + "MachineLearning": { + "count": 61 + }, + "puppy101": { + "count": 1597 + }, + "GunAccessoriesForSale": { + "count": 2619 + }, + "Random_Acts_Of_Amazon": { + "count": 1115 + }, + "Catholicism": { + "count": 183 + }, + "MonsterHunter": { + "count": 218 + }, + "tipofmypenis": { + "count": 87 + }, + "samsung": { + "count": 69 + }, + "PersonalFinanceCanada": { + "count": 341 + }, + "Dyson_Sphere_Program": { + "count": 55 + }, + "bleach": { + "count": 41 + }, + "AmItheAsshole": { + "count": 3730 + }, + "WallStreetbetsELITE": { + "count": 328 + }, + "GlobalPowers": { + "count": 35 + }, + "ABraThatFits": { + "count": 159 + }, + "PokemonGoFriends": { + "count": 1165 + }, + "NoMansSkyTheGame": { + "count": 259 + }, + "masseffect": { + "count": 233 + }, + "dating_advice": { + "count": 559 + }, + "yoga": { + "count": 50 + }, + "depression": { + "count": 515 + }, + "COVID19positive": { + "count": 180 + }, + "generationology": { + "count": 37 + }, + "feedthebeast": { + "count": 192 + }, + "EliteDangerous": { + "count": 270 + }, + "alcoholicsanonymous": { + "count": 93 + }, + "GoRVing": { + "count": 35 + }, + "thedivision": { + "count": 111 + }, + "breakingmom": { + "count": 105 + }, + "AskAnAmerican": { + "count": 80 + }, + "HypnoFair": { + "count": 5 + }, + "JustUnsubbed": { + "count": 13 + }, + "socialanxiety": { + "count": 123 + }, + "dirtykikpals": { + "count": 202 + }, + "askTO": { + "count": 126 + }, + "AskCulinary": { + "count": 108 + }, + "Bogleheads": { + "count": 71 + }, + "dragonquest": { + "count": 45 + }, + "NoContract": { + "count": 30 + }, + "gorillaz": { + "count": 14 + }, + "MondoGore": { + "count": 8 + }, + "comicswap": { + "count": 56 + }, + "VirtualYoutubers": { + "count": 92 + }, + "Gta5Modding": { + "count": 28 + }, + "obs": { + "count": 61 + }, + "vcu": { + "count": 9 + }, + "KingkillerChronicle": { + "count": 17 + }, + "AmongUs": { + "count": 41 + }, + "wireshark": { + "count": 3 + }, + "Dodocodes": { + "count": 46 + }, + "Aliexpress": { + "count": 40 + }, + "LearnerDriverUK": { + "count": 12 + }, + "PanicAttack": { + "count": 23 + }, + "KassadinMains": { + "count": 10 + }, + "islam": { + "count": 93 + }, + "chronotrigger": { + "count": 4 + }, + "skincareexchange": { + "count": 13 + }, + "PokemonHome": { + "count": 21 + }, + "survivinginfidelity": { + "count": 71 + }, + "igcse": { + "count": 21 + }, + "C25K": { + "count": 21 + }, + "aorus": { + "count": 2 + }, + "idleon": { + "count": 19 + }, + "photography": { + "count": 22 + }, + "cryptocoins": { + "count": 7 + }, + "CanaryWharfBets": { + "count": 7 + }, + "KillingEve": { + "count": 7 + }, + "GameBuilderGarage": { + "count": 16 + }, + "SauceSharingCommunity": { + "count": 7 + }, + "turo": { + "count": 9 + }, + "foodscience": { + "count": 14 + }, + "HIMYM": { + "count": 20 + }, + "HauntingOfHillHouse": { + "count": 4 + }, + "GoodNotes": { + "count": 8 + }, + "RedditWritesSeinfeld": { + "count": 6 + }, + "AirReps": { + "count": 2 + }, + "ADHD": { + "count": 3811 + }, + "BuddyCrossing": { + "count": 446 + }, + "libraryofruina": { + "count": 98 + }, + "SluttyConfessions": { + "count": 2787 + }, + "tipofmytongue": { + "count": 7145 + }, + "fleshlight": { + "count": 128 + }, + "amcstock": { + "count": 13910 + }, + "teenagers": { + "count": 77908 + }, + "suggestmeabook": { + "count": 1540 + }, + "dirtypenpals": { + "count": 5587 + }, + "MinecraftServer": { + "count": 177 + }, + "CreditCards": { + "count": 669 + }, + "Guitar": { + "count": 10952 + }, + "rpg": { + "count": 529 + }, + "NoFap": { + "count": 14853 + }, + "lfg": { + "count": 1093 + }, + "MarsWallStreet": { + "count": 935 + }, + "SummonSign": { + "count": 931 + }, + "AssassinsCreedValhala": { + "count": 295 + }, + "hoi4": { + "count": 432 + }, + "Coins4Sale": { + "count": 260 + }, + "xbox": { + "count": 459 + }, + "TooAfraidToAsk": { + "count": 7404 + }, + "NBA2k": { + "count": 553 + }, + "KGBTR": { + "count": 943 + }, + "roblox": { + "count": 220 + }, + "salesforce": { + "count": 214 + }, + "TwoXChromosomes": { + "count": 1736 + }, + "mechmarket": { + "count": 4863 + }, + "Gaming_Headsets": { + "count": 103 + }, + "pittsburgh": { + "count": 189 + }, + "CryptoMars": { + "count": 1606 + }, + "FridayNightFunkin": { + "count": 378 + }, + "vaginismus": { + "count": 122 + }, + "transpositive": { + "count": 10 + }, + "comicbooks": { + "count": 274 + }, + "BDSMcommunity": { + "count": 185 + }, + "aliens": { + "count": 201 + }, + "Scotch": { + "count": 64 + }, + "KikRoleplay": { + "count": 141 + }, + "Kayaking": { + "count": 91 + }, + "196": { + "count": 47 + }, + "digimon": { + "count": 140 + }, + "Evernote": { + "count": 42 + }, + "logh": { + "count": 22 + }, + "arlington": { + "count": 15 + }, + "Adopted": { + "count": 8 + }, + "DissonautUniverse": { + "count": 4 + }, + "Midsommar": { + "count": 12 + }, + "SofiawithanF": { + "count": 83 + }, + "xmpp": { + "count": 6 + }, + "ZombsRoyale": { + "count": 16 + }, + "accesscontrol": { + "count": 8 + }, + "WetlanderHumor": { + "count": 2 + }, + "PoonamPandeyFanatics": { + "count": 2 + }, + "screenplaychallenge": { + "count": 2 + }, + "scatstories": { + "count": 2 + }, + "techsupport": { + "count": 290 + }, + "whatcarshouldIbuy": { + "count": 79 + }, + "Stormlight_Archive": { + "count": 15 + }, + "deadbydaylight": { + "count": 126 + }, + "bicycling": { + "count": 27 + }, + "oculus": { + "count": 64 + }, + "Cartalk": { + "count": 33 + }, + "Sims4": { + "count": 43 + }, + "NoFeeAC": { + "count": 95 + }, + "Crypto_com": { + "count": 37 + }, + "ITCareerQuestions": { + "count": 259 + }, + "aromantic": { + "count": 18 + }, + "Revu": { + "count": 3 + }, + "exalted": { + "count": 2 + }, + "HilariaBaldwin": { + "count": 20 + }, + "Testosterone": { + "count": 35 + }, + "Screenwriting": { + "count": 170 + }, + "LifeProTips": { + "count": 49 + }, + "steinsgate": { + "count": 13 + }, + "Baystreetbets": { + "count": 10 + }, + "AskGirls": { + "count": 7 + }, + "idlechampions": { + "count": 7 + }, + "facebook": { + "count": 17 + }, + "tf2trade": { + "count": 4 + }, + "mfdoom": { + "count": 3 + }, + "FiddlesticksMains": { + "count": 2 + }, + "HFY": { + "count": 10 + }, + "FiestaST": { + "count": 2 + }, + "whatsthatbook": { + "count": 994 + }, + "GearsOfWar": { + "count": 879 + }, + "KazuhaMains": { + "count": 175 + }, + "RepTime": { + "count": 211 + }, + "AstroGaming": { + "count": 141 + }, + "metalgearsolid": { + "count": 152 + }, + "qBittorrent": { + "count": 39 + }, + "ELLIPAL_Official": { + "count": 24 + }, + "raisedbynarcissists": { + "count": 4895 + }, + "unpopularopinion": { + "count": 14901 + }, + "ACTrade": { + "count": 5679 + }, + "askcarsales": { + "count": 1339 + }, + "AskVet": { + "count": 1357 + }, + "whowouldwin": { + "count": 4493 + }, + "playstation": { + "count": 1362 + }, + "anime": { + "count": 6531 + }, + "GME": { + "count": 12577 + }, + "DotA2": { + "count": 2004 + }, + "cryptostreetbets": { + "count": 2241 + }, + "MonsterHunterWorld": { + "count": 698 + }, + "Market76": { + "count": 14274 + }, + "DnD": { + "count": 5092 + }, + "leagueoflegends": { + "count": 3683 + }, + "doordash_drivers": { + "count": 1626 + }, + "theta_network": { + "count": 489 + }, + "exmuslim": { + "count": 1369 + }, + "gonewildaudio": { + "count": 2998 + }, + "conspiracy": { + "count": 3587 + }, + "heroesofthestorm": { + "count": 535 + }, + "FanFiction": { + "count": 2782 + }, + "Doom": { + "count": 1251 + }, + "texas": { + "count": 269 + }, + "Vent": { + "count": 1738 + }, + "selfimprovement": { + "count": 1284 + }, + "youtubers": { + "count": 706 + }, + "askseddit": { + "count": 237 + }, + "boardgames": { + "count": 1237 + }, + "bravelydefault": { + "count": 347 + }, + "ConquerorsBlade": { + "count": 238 + }, + "ChronicPain": { + "count": 527 + }, + "teenagersnew": { + "count": 256 + }, + "brasil": { + "count": 1092 + }, + "MatthiasSubmissions": { + "count": 921 + }, + "MarylandUnemployment": { + "count": 314 + }, + "SaltLakeCity": { + "count": 411 + }, + "BokunoheroFanfiction": { + "count": 155 + }, + "BenignExistence": { + "count": 125 + }, + "GayYoungOldDating": { + "count": 156 + }, + "Bible": { + "count": 202 + }, + "haskell": { + "count": 154 + }, + "seduction": { + "count": 400 + }, + "fantasywriters": { + "count": 262 + }, + "HiveOS": { + "count": 100 + }, + "PerkByDaylight": { + "count": 15 + }, + "Hedgehog": { + "count": 73 + }, + "xmen": { + "count": 263 + }, + "HyperRP": { + "count": 122 + }, + "emotestories": { + "count": 3 + }, + "tutanota": { + "count": 135 + }, + "CultoftheFranklin": { + "count": 46 + }, + "langrisser": { + "count": 62 + }, + "CozyGrove": { + "count": 61 + }, + "Sverigesforsvarsmakt": { + "count": 12 + }, + "silverbugbets": { + "count": 21 + }, + "WreckingBallMains": { + "count": 5 + }, + "capitalism_in_decay": { + "count": 8 + }, + "paintdotnet": { + "count": 11 + }, + "u_mawadom118": { + "count": 4 + }, + "xboxfindfriends": { + "count": 2 + }, + "CPTSD": { + "count": 540 + }, + "destiny2": { + "count": 318 + }, + "Wallstreetsilver": { + "count": 1013 + }, + "DestinyTheGame": { + "count": 1107 + }, + "blackopscoldwar": { + "count": 400 + }, + "InstacartShoppers": { + "count": 202 + }, + "RocketLeagueExchange": { + "count": 832 + }, + "apexlegends": { + "count": 3265 + }, + "kansascity": { + "count": 53 + }, + "namenerds": { + "count": 235 + }, + "help": { + "count": 152 + }, + "Kengan_Ashura": { + "count": 132 + }, + "thetagang": { + "count": 165 + }, + "GameSale": { + "count": 262 + }, + "Reduction": { + "count": 109 + }, + "sex": { + "count": 906 + }, + "bostonr4r": { + "count": 75 + }, + "LegendsOfRuneterra": { + "count": 231 + }, + "overlord": { + "count": 48 + }, + "madisonwi": { + "count": 53 + }, + "steelseries": { + "count": 79 + }, + "ClashOfClansRecruit": { + "count": 214 + }, + "CharacterRant": { + "count": 55 + }, + "AirForce": { + "count": 94 + }, + "sexstories": { + "count": 92 + }, + "NameThatSong": { + "count": 162 + }, + "depressed": { + "count": 74 + }, + "ibs": { + "count": 150 + }, + "40kLore": { + "count": 269 + }, + "podcasts": { + "count": 88 + }, + "miraculousladybug": { + "count": 150 + }, + "ask": { + "count": 224 + }, + "EverMerge": { + "count": 31 + }, + "TMJ": { + "count": 54 + }, + "BitLifeApp": { + "count": 39 + }, + "FireEmblemHeroes": { + "count": 100 + }, + "software": { + "count": 62 + }, + "ShieldAndroidTV": { + "count": 70 + }, + "GriefSupport": { + "count": 125 + }, + "onewheel": { + "count": 37 + }, + "MensRights": { + "count": 80 + }, + "nhl": { + "count": 22 + }, + "ClashOfClans": { + "count": 107 + }, + "ps3homebrew": { + "count": 33 + }, + "LightNovels": { + "count": 77 + }, + "redsox": { + "count": 34 + }, + "CryptoMarkets": { + "count": 44 + }, + "ugly": { + "count": 47 + }, + "GCXRep": { + "count": 12 + }, + "cscareerquestionsEU": { + "count": 65 + }, + "MindHunter": { + "count": 6 + }, + "starcraft2coop": { + "count": 15 + }, + "nanocurrency": { + "count": 1421 + }, + "ModelCars": { + "count": 8 + }, + "UKJobs": { + "count": 30 + }, + "Netherlands": { + "count": 44 + }, + "clonewars": { + "count": 8 + }, + "Julia": { + "count": 11 + }, + "Prolactinoma": { + "count": 9 + }, + "sofi": { + "count": 11 + }, + "royalfamily": { + "count": 6 + }, + "ConnecticutR4R": { + "count": 8 + }, + "weather": { + "count": 5 + }, + "oneui": { + "count": 7 + }, + "KTM": { + "count": 5 + }, + "Aerials": { + "count": 3 + }, + "seoul": { + "count": 2 + }, + "exjw": { + "count": 3281 + }, + "ModernMagic": { + "count": 699 + }, + "Paladins": { + "count": 1242 + }, + "kdramarecommends": { + "count": 1611 + }, + "hitbtc": { + "count": 330 + }, + "endocrinology": { + "count": 75 + }, + "Bath": { + "count": 43 + }, + "NassauCountyHookups": { + "count": 5 + }, + "feminineboys": { + "count": 1248 + }, + "dreamsmp": { + "count": 2018 + }, + "SquaredCircle": { + "count": 2255 + }, + "Minecraft": { + "count": 8753 + }, + "spirituality": { + "count": 1809 + }, + "Eldenring": { + "count": 1471 + }, + "Sat": { + "count": 1172 + }, + "bonnaroo": { + "count": 194 + }, + "gardening": { + "count": 1892 + }, + "Unemployment": { + "count": 6185 + }, + "mac": { + "count": 1847 + }, + "Bestbuy": { + "count": 437 + }, + "quittingkratom": { + "count": 1081 + }, + "lawschooladmissions": { + "count": 3436 + }, + "NiceHash": { + "count": 2135 + }, + "McMaster": { + "count": 815 + }, + "covidlonghaulers": { + "count": 1299 + }, + "stalker": { + "count": 758 + }, + "MLBTheShow": { + "count": 2721 + }, + "FortniteCompetitive": { + "count": 998 + }, + "dpdr": { + "count": 514 + }, + "appliancerepair": { + "count": 720 + }, + "thomasthetankengine": { + "count": 207 + }, + "delhi": { + "count": 217 + }, + "Huel": { + "count": 300 + }, + "leafs": { + "count": 203 + }, + "HotWheels": { + "count": 170 + }, + "90dayfianceuncensored": { + "count": 550 + }, + "Throwers": { + "count": 142 + }, + "Wavyhair": { + "count": 270 + }, + "CryptoHorde": { + "count": 128 + }, + "ShuumatsuNoValkyrie": { + "count": 453 + }, + "TeensMeetTeens": { + "count": 432 + }, + "dbrand": { + "count": 108 + }, + "SLFmeetups": { + "count": 18 + }, + "1200isplentyketo": { + "count": 48 + }, + "passive_income": { + "count": 211 + }, + "BroadCity": { + "count": 16 + }, + "RevenantMain": { + "count": 71 + }, + "extrarfl": { + "count": 25 + }, + "AgonGame": { + "count": 5 + }, + "FitnessDE": { + "count": 3 + }, + "gaming": { + "count": 1277 + }, + "livesound": { + "count": 91 + }, + "IBO": { + "count": 1896 + }, + "EscapefromTarkov": { + "count": 1300 + }, + "amex": { + "count": 145 + }, + "DMAcademy": { + "count": 1411 + }, + "VinylCollectors": { + "count": 556 + }, + "cardano": { + "count": 716 + }, + "brave_browser": { + "count": 159 + }, + "dating": { + "count": 952 + }, + "OculusQuest": { + "count": 942 + }, + "Superstonk": { + "count": 3089 + }, + "MtF": { + "count": 957 + }, + "findaleague": { + "count": 207 + }, + "Nioh": { + "count": 398 + }, + "IRS": { + "count": 715 + }, + "transgendercirclejerk": { + "count": 353 + }, + "learnmath": { + "count": 489 + }, + "piano": { + "count": 263 + }, + "LeagueConnect": { + "count": 216 + }, + "eu4": { + "count": 561 + }, + "Wordpress": { + "count": 345 + }, + "RoleplayingForReddit": { + "count": 31 + }, + "LOONA": { + "count": 89 + }, + "newtothenavy": { + "count": 167 + }, + "HaircareScience": { + "count": 118 + }, + "appletv": { + "count": 167 + }, + "sissypersonals": { + "count": 102 + }, + "raleigh": { + "count": 168 + }, + "realonlyfansreviews": { + "count": 21 + }, + "AskGames": { + "count": 49 + }, + "PokemonTCG": { + "count": 325 + }, + "controlgame": { + "count": 109 + }, + "GoogleDataStudio": { + "count": 16 + }, + "WhiteWolfRPG": { + "count": 139 + }, + "MECoOp": { + "count": 31 + }, + "snuffrp": { + "count": 46 + }, + "lockpicking": { + "count": 103 + }, + "wicked_edge": { + "count": 105 + }, + "BMW": { + "count": 99 + }, + "choiceofgames": { + "count": 24 + }, + "hisdarkmaterials": { + "count": 12 + }, + "SakuraGakuin": { + "count": 24 + }, + "detrans": { + "count": 55 + }, + "Smallville": { + "count": 37 + }, + "kingofqueens": { + "count": 7 + }, + "JamesHoffmann": { + "count": 22 + }, + "stashinvest": { + "count": 16 + }, + "ABA": { + "count": 79 + }, + "ladybusiness": { + "count": 10 + }, + "gamegrumps": { + "count": 32 + }, + "GodEater": { + "count": 21 + }, + "tomorrow": { + "count": 39 + }, + "Tomorrowland": { + "count": 9 + }, + "BlackCountryNewRoad": { + "count": 5 + }, + "STAYC": { + "count": 3 + }, + "SatoshiStreetBets": { + "count": 3828 + }, + "AskLosAngeles": { + "count": 1036 + }, + "buildapcforme": { + "count": 1689 + }, + "ApplyingToCollege": { + "count": 10675 + }, + "watercooling": { + "count": 1209 + }, + "BreakUps": { + "count": 4914 + }, + "FIFA": { + "count": 3811 + }, + "emacs": { + "count": 712 + }, + "trakstocks": { + "count": 691 + }, + "Shittyaskflying": { + "count": 147 + }, + "AmazonFC": { + "count": 1178 + }, + "stocks": { + "count": 4610 + }, + "BangaloreMains": { + "count": 26 + }, + "pokemon": { + "count": 3953 + }, + "religion": { + "count": 684 + }, + "cuboulder": { + "count": 269 + }, + "self": { + "count": 1688 + }, + "tarot": { + "count": 912 + }, + "turtles": { + "count": 49 + }, + "TheMagnusArchives": { + "count": 300 + }, + "Superhero_Ideas": { + "count": 34 + }, + "NTU": { + "count": 308 + }, + "touhou": { + "count": 623 + }, + "JoJolion": { + "count": 50 + }, + "lasers": { + "count": 27 + }, + "popperpigs": { + "count": 67 + }, + "aggretsuko": { + "count": 20 + }, + "Library": { + "count": 5 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/RuSciBenchGRNTIClusteringP2P.json b/mteb/descriptive_stats/Clustering/RuSciBenchGRNTIClusteringP2P.json index 9eff1b40d4..126cd893bc 100644 --- a/mteb/descriptive_stats/Clustering/RuSciBenchGRNTIClusteringP2P.json +++ b/mteb/descriptive_stats/Clustering/RuSciBenchGRNTIClusteringP2P.json @@ -2,8 +2,12 @@ "test": { "num_samples": 2048, "number_of_characters": 1822339, + "min_text_length": 84, "average_text_length": 889.81396484375, + "max_text_length": 3143, + "min_labels_per_text": 73, "average_labels_per_text": 1.0, + "max_labels_per_text": 74, "unique_labels": 28, "labels": { "3": { diff --git a/mteb/descriptive_stats/Clustering/TwentyNewsgroupsClustering.v2.json b/mteb/descriptive_stats/Clustering/TwentyNewsgroupsClustering.v2.json new file mode 100644 index 0000000000..77be5a3b77 --- /dev/null +++ b/mteb/descriptive_stats/Clustering/TwentyNewsgroupsClustering.v2.json @@ -0,0 +1,75 @@ +{ + "test": { + "num_samples": 59545, + "number_of_characters": 1907719, + "min_text_length": 11, + "average_text_length": 32.03827357460744, + "max_text_length": 120, + "min_labels_per_text": 2082, + "average_labels_per_text": 1.0, + "max_labels_per_text": 3236, + "unique_labels": 20, + "labels": { + "12": { + "count": 3137 + }, + "6": { + "count": 3070 + }, + "0": { + "count": 2613 + }, + "2": { + "count": 3155 + }, + "10": { + "count": 3220 + }, + "17": { + "count": 2986 + }, + "14": { + "count": 3106 + }, + "13": { + "count": 3055 + }, + "1": { + "count": 3056 + }, + "16": { + "count": 2911 + }, + "9": { + "count": 2984 + }, + "3": { + "count": 3070 + }, + "15": { + "count": 3090 + }, + "7": { + "count": 3036 + }, + "5": { + "count": 3124 + }, + "11": { + "count": 3236 + }, + "18": { + "count": 2483 + }, + "8": { + "count": 3090 + }, + "19": { + "count": 2082 + }, + "4": { + "count": 3041 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json b/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json index 99b033bce0..4c1f303098 100644 --- a/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json +++ b/mteb/descriptive_stats/Clustering/WikiClusteringP2P.json @@ -2,8 +2,13 @@ "test": { "num_samples": 140, "number_of_characters": 71680, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 49704, + "min_labels_per_text": 1, "average_labels_per_text": 512.0, + "max_labels_per_text": 3986, "unique_labels": 282, "labels": { "Nauke": { @@ -857,8 +862,13 @@ "bs": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 3860, + "min_labels_per_text": 6, "average_labels_per_text": 512.0, + "max_labels_per_text": 1492, "unique_labels": 17, "labels": { "Nauke": { @@ -917,8 +927,13 @@ "ca": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 4596, + "min_labels_per_text": 20, "average_labels_per_text": 512.0, + "max_labels_per_text": 1844, "unique_labels": 8, "labels": { "Llocs": { @@ -950,8 +965,13 @@ "cs": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 4782, + "min_labels_per_text": 21, "average_labels_per_text": 512.0, + "max_labels_per_text": 1559, "unique_labels": 21, "labels": { "Lid\u00c3\u00a9": { @@ -1022,8 +1042,13 @@ "da": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 4725, + "min_labels_per_text": 35, "average_labels_per_text": 512.0, + "max_labels_per_text": 911, "unique_labels": 20, "labels": { "Natur": { @@ -1091,8 +1116,13 @@ "eu": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 4474, + "min_labels_per_text": 110, "average_labels_per_text": 512.0, + "max_labels_per_text": 2486, "unique_labels": 5, "labels": { "Entitateak": { @@ -1115,8 +1145,13 @@ "gv": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 2717, + "min_labels_per_text": 2, "average_labels_per_text": 512.0, + "max_labels_per_text": 1334, "unique_labels": 28, "labels": { "Chron-oaylleeaght": { @@ -1208,8 +1243,13 @@ "ilo": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 2258, + "min_labels_per_text": 1, "average_labels_per_text": 512.0, + "max_labels_per_text": 1405, "unique_labels": 34, "labels": { "Katutubo": { @@ -1319,8 +1359,13 @@ "ku": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 3365, + "min_labels_per_text": 5, "average_labels_per_text": 512.0, + "max_labels_per_text": 1078, "unique_labels": 39, "labels": { "Kes": { @@ -1445,8 +1490,13 @@ "lv": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 4540, + "min_labels_per_text": 13, "average_labels_per_text": 512.0, + "max_labels_per_text": 878, "unique_labels": 16, "labels": { "Kult\u00c5\u00abra": { @@ -1502,8 +1552,13 @@ "min": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 3881, + "min_labels_per_text": 1, "average_labels_per_text": 512.0, + "max_labels_per_text": 3986, "unique_labels": 16, "labels": { "Makaluak_iduik": { @@ -1559,8 +1614,13 @@ "mt": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 1887, + "min_labels_per_text": 2, "average_labels_per_text": 512.0, + "max_labels_per_text": 1634, "unique_labels": 27, "labels": { "\u00c4\u00a0eografija": { @@ -1649,8 +1709,13 @@ "sco": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 2605, + "min_labels_per_text": 3, "average_labels_per_text": 512.0, + "max_labels_per_text": 1081, "unique_labels": 23, "labels": { "Life": { @@ -1727,8 +1792,13 @@ "sq": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 3741, + "min_labels_per_text": 2, "average_labels_per_text": 512.0, + "max_labels_per_text": 1109, "unique_labels": 36, "labels": { "Gjeografi": { @@ -1844,8 +1914,13 @@ "wa": { "num_samples": 10, "number_of_characters": 5120, + "min_text_length": 512, "average_text_length": 512.0, + "max_text_length": 512, + "unique_texts": 2317, + "min_labels_per_text": 2, "average_labels_per_text": 512.0, + "max_labels_per_text": 3653, "unique_labels": 6, "labels": { "Economeye": { diff --git a/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json b/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json index 8a912bee43..897b23d7c7 100644 --- a/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json +++ b/mteb/descriptive_stats/InstructionRetrieval/Core17InstructionRetrieval.json @@ -4,11 +4,27 @@ "num_docs": 19899, "num_queries": 20, "number_of_characters": 44450333, + "min_document_length": 7, "average_document_length": 2233.0329664807277, + "max_document_length": 2959, + "unique_docs": 19143, + "min_query_length": 55, "average_query_length": 109.75, + "max_query_length": 278, + "unique_queries": 20, + "min_instruction_length": 102, "average_instruction_length": 295.55, + "max_instruction_length": 811, + "unique_instructions": 20, + "min_changed_instruction_length": 151, "average_changed_instruction_length": 355.2, + "max_changed_instruction_length": 837, + "unique_changed_instructions": 20, + "min_average_relevant_docs_per_query": 4, "average_relevant_docs_per_query": 32.7, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 55, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json b/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json index 2120a11139..d5d91adf50 100644 --- a/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json +++ b/mteb/descriptive_stats/MultilabelClassification/CEDRClassification.json @@ -1,9 +1,15 @@ { "test": { - "average_text_length": 91.20563230605738, + "num_samples": 1882, "number_of_characters": 171649, + "number_texts_in_train": 7, + "min_text_length": 6, + "average_text_length": 91.20563230605738, + "max_text_length": 220, + "unique_texts": 1875, + "min_labels_per_text": 0, "average_label_per_text": 0.620616365568544, - "num_samples": 1882, + "max_labels_per_text": 2, "unique_labels": 6, "labels": { "None": { @@ -25,5 +31,38 @@ "count": 125 } } + }, + "train": { + "num_samples": 7528, + "number_of_characters": 697322, + "number_texts_in_train": null, + "min_text_length": 5, + "average_text_length": 92.63044633368757, + "max_text_length": 280, + "unique_texts": 7500, + "min_labels_per_text": 0, + "average_label_per_text": 0.6101222104144527, + "max_labels_per_text": 3, + "unique_labels": 6, + "labels": { + "None": { + "count": 3043 + }, + "2": { + "count": 607 + }, + "0": { + "count": 1569 + }, + "3": { + "count": 589 + }, + "1": { + "count": 1417 + }, + "4": { + "count": 411 + } + } } } \ No newline at end of file diff --git a/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json b/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json deleted file mode 100644 index 2f4f979d02..0000000000 --- a/mteb/descriptive_stats/MultilabelClassification/MultiEURLEXMultilabelClassification.json +++ /dev/null @@ -1,1732 +0,0 @@ -{ - "test": { - "average_text_length": 12014.408930434782, - "number_of_characters": 1381657027, - "average_label_per_text": 3.5938, - "num_samples": 115000, - "unique_labels": 21, - "labels": { - "18": { - "count": 50784 - }, - "15": { - "count": 30981 - }, - "5": { - "count": 24978 - }, - "6": { - "count": 45080 - }, - "3": { - "count": 63687 - }, - "17": { - "count": 37743 - }, - "1": { - "count": 15019 - }, - "20": { - "count": 14030 - }, - "0": { - "count": 17802 - }, - "2": { - "count": 22402 - }, - "19": { - "count": 10212 - }, - "9": { - "count": 3772 - }, - "4": { - "count": 9062 - }, - "10": { - "count": 7705 - }, - "11": { - "count": 12213 - }, - "7": { - "count": 14306 - }, - "12": { - "count": 11799 - }, - "8": { - "count": 13800 - }, - "13": { - "count": 2346 - }, - "14": { - "count": 4255 - }, - "16": { - "count": 1311 - } - }, - "hf_subset_descriptive_stats": { - "en": { - "average_text_length": 11720.2926, - "number_of_characters": 58601463, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "de": { - "average_text_length": 12865.4162, - "number_of_characters": 64327081, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "fr": { - "average_text_length": 13081.1098, - "number_of_characters": 65405549, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "it": { - "average_text_length": 12763.4786, - "number_of_characters": 63817393, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "es": { - "average_text_length": 13080.29, - "number_of_characters": 65401450, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "pl": { - "average_text_length": 12282.5926, - "number_of_characters": 61412963, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "ro": { - "average_text_length": 12836.9322, - "number_of_characters": 64184661, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "nl": { - "average_text_length": 12857.9742, - "number_of_characters": 64289871, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "el": { - "average_text_length": 12998.143, - "number_of_characters": 64990715, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "hu": { - "average_text_length": 12424.641, - "number_of_characters": 62123205, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "pt": { - "average_text_length": 12482.4616, - "number_of_characters": 62412308, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "cs": { - "average_text_length": 10783.4676, - "number_of_characters": 53917338, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "sv": { - "average_text_length": 11612.4774, - "number_of_characters": 58062387, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "bg": { - "average_text_length": 12235.4268, - "number_of_characters": 61177134, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "da": { - "average_text_length": 11773.958, - "number_of_characters": 58869790, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "fi": { - "average_text_length": 12087.6862, - "number_of_characters": 60438431, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "sk": { - "average_text_length": 11130.814, - "number_of_characters": 55654070, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "lt": { - "average_text_length": 11245.3566, - "number_of_characters": 56226783, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "hr": { - "average_text_length": 11022.142, - "number_of_characters": 55110710, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "sl": { - "average_text_length": 10620.0594, - "number_of_characters": 53100297, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "et": { - "average_text_length": 10898.4312, - "number_of_characters": 54492156, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "lv": { - "average_text_length": 10938.5102, - "number_of_characters": 54692551, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - }, - "mt": { - "average_text_length": 12589.7442, - "number_of_characters": 62948721, - "average_label_per_text": 3.5938, - "num_samples": 5000, - "unique_labels": 21, - "labels": { - "18": { - "count": 2208 - }, - "15": { - "count": 1347 - }, - "5": { - "count": 1086 - }, - "6": { - "count": 1960 - }, - "3": { - "count": 2769 - }, - "17": { - "count": 1641 - }, - "1": { - "count": 653 - }, - "20": { - "count": 610 - }, - "0": { - "count": 774 - }, - "2": { - "count": 974 - }, - "19": { - "count": 444 - }, - "9": { - "count": 164 - }, - "4": { - "count": 394 - }, - "10": { - "count": 335 - }, - "11": { - "count": 531 - }, - "7": { - "count": 622 - }, - "12": { - "count": 513 - }, - "8": { - "count": 600 - }, - "13": { - "count": 102 - }, - "14": { - "count": 185 - }, - "16": { - "count": 57 - } - } - } - } - } -} \ No newline at end of file diff --git a/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json b/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json index 63180983ca..849724bdba 100644 --- a/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json +++ b/mteb/descriptive_stats/PairClassification/PawsXPairClassification.json @@ -2,8 +2,14 @@ "test": { "num_samples": 14000, "number_of_characters": 2551922, - "avg_sentence1_len": 91.17892857142857, - "avg_sentence2_len": 91.10121428571429, + "min_sentence1_length": 2, + "avg_sentence1_length": 91.17892857142857, + "max_sentence1_length": 268, + "unique_sentence1": 13404, + "min_sentence2_length": 2, + "avg_sentence2_length": 91.10121428571429, + "max_sentence2_length": 247, + "unique_sentence2": 13462, "unique_labels": 2, "labels": { "1": { @@ -17,8 +23,14 @@ "de": { "num_samples": 2000, "number_of_characters": 478034, - "avg_sentence1_len": 119.7815, - "avg_sentence2_len": 119.2355, + "min_sentence1_length": 2, + "avg_sentence1_length": 119.7815, + "max_sentence1_length": 268, + "unique_sentence1": 1934, + "min_sentence2_length": 2, + "avg_sentence2_length": 119.2355, + "max_sentence2_length": 235, + "unique_sentence2": 1938, "unique_labels": 2, "labels": { "1": { @@ -32,8 +44,14 @@ "en": { "num_samples": 2000, "number_of_characters": 454362, - "avg_sentence1_len": 113.7575, - "avg_sentence2_len": 113.4235, + "min_sentence1_length": 25, + "avg_sentence1_length": 113.7575, + "max_sentence1_length": 209, + "unique_sentence1": 1761, + "min_sentence2_length": 25, + "avg_sentence2_length": 113.4235, + "max_sentence2_length": 209, + "unique_sentence2": 1800, "unique_labels": 2, "labels": { "1": { @@ -47,8 +65,14 @@ "es": { "num_samples": 2000, "number_of_characters": 471226, - "avg_sentence1_len": 117.815, - "avg_sentence2_len": 117.798, + "min_sentence1_length": 2, + "avg_sentence1_length": 117.815, + "max_sentence1_length": 226, + "unique_sentence1": 1955, + "min_sentence2_length": 22, + "avg_sentence2_length": 117.798, + "max_sentence2_length": 233, + "unique_sentence2": 1959, "unique_labels": 2, "labels": { "1": { @@ -62,8 +86,14 @@ "fr": { "num_samples": 2000, "number_of_characters": 480033, - "avg_sentence1_len": 120.028, - "avg_sentence2_len": 119.9885, + "min_sentence1_length": 2, + "avg_sentence1_length": 120.028, + "max_sentence1_length": 238, + "unique_sentence1": 1954, + "min_sentence2_length": 2, + "avg_sentence2_length": 119.9885, + "max_sentence2_length": 247, + "unique_sentence2": 1953, "unique_labels": 2, "labels": { "1": { @@ -77,8 +107,14 @@ "ja": { "num_samples": 2000, "number_of_characters": 235106, - "avg_sentence1_len": 58.678, - "avg_sentence2_len": 58.875, + "min_sentence1_length": 2, + "avg_sentence1_length": 58.678, + "max_sentence1_length": 192, + "unique_sentence1": 1944, + "min_sentence2_length": 2, + "avg_sentence2_length": 58.875, + "max_sentence2_length": 198, + "unique_sentence2": 1941, "unique_labels": 2, "labels": { "1": { @@ -92,8 +128,14 @@ "ko": { "num_samples": 2000, "number_of_characters": 260149, - "avg_sentence1_len": 64.9605, - "avg_sentence2_len": 65.114, + "min_sentence1_length": 2, + "avg_sentence1_length": 64.9605, + "max_sentence1_length": 153, + "unique_sentence1": 1954, + "min_sentence2_length": 2, + "avg_sentence2_length": 65.114, + "max_sentence2_length": 159, + "unique_sentence2": 1969, "unique_labels": 2, "labels": { "1": { @@ -107,8 +149,14 @@ "zh": { "num_samples": 2000, "number_of_characters": 173012, - "avg_sentence1_len": 43.232, - "avg_sentence2_len": 43.274, + "min_sentence1_length": 2, + "avg_sentence1_length": 43.232, + "max_sentence1_length": 120, + "unique_sentence1": 1909, + "min_sentence2_length": 2, + "avg_sentence2_length": 43.274, + "max_sentence2_length": 113, + "unique_sentence2": 1909, "unique_labels": 2, "labels": { "1": { @@ -124,8 +172,14 @@ "validation": { "num_samples": 14000, "number_of_characters": 2524625, - "avg_sentence1_len": 90.12585714285714, - "avg_sentence2_len": 90.2045, + "min_sentence1_length": 2, + "avg_sentence1_length": 90.12585714285714, + "max_sentence1_length": 248, + "unique_sentence1": 13357, + "min_sentence2_length": 2, + "avg_sentence2_length": 90.2045, + "max_sentence2_length": 275, + "unique_sentence2": 13397, "unique_labels": 2, "labels": { "1": { @@ -139,8 +193,14 @@ "de": { "num_samples": 2000, "number_of_characters": 467643, - "avg_sentence1_len": 116.82, - "avg_sentence2_len": 117.0015, + "min_sentence1_length": 2, + "avg_sentence1_length": 116.82, + "max_sentence1_length": 248, + "unique_sentence1": 1914, + "min_sentence2_length": 2, + "avg_sentence2_length": 117.0015, + "max_sentence2_length": 275, + "unique_sentence2": 1920, "unique_labels": 2, "labels": { "1": { @@ -154,8 +214,14 @@ "en": { "num_samples": 2000, "number_of_characters": 451931, - "avg_sentence1_len": 113.1075, - "avg_sentence2_len": 112.858, + "min_sentence1_length": 25, + "avg_sentence1_length": 113.1075, + "max_sentence1_length": 213, + "unique_sentence1": 1758, + "min_sentence2_length": 25, + "avg_sentence2_length": 112.858, + "max_sentence2_length": 213, + "unique_sentence2": 1771, "unique_labels": 2, "labels": { "1": { @@ -169,8 +235,14 @@ "es": { "num_samples": 2000, "number_of_characters": 466112, - "avg_sentence1_len": 116.3285, - "avg_sentence2_len": 116.7275, + "min_sentence1_length": 2, + "avg_sentence1_length": 116.3285, + "max_sentence1_length": 240, + "unique_sentence1": 1938, + "min_sentence2_length": 2, + "avg_sentence2_length": 116.7275, + "max_sentence2_length": 241, + "unique_sentence2": 1941, "unique_labels": 2, "labels": { "1": { @@ -184,8 +256,14 @@ "fr": { "num_samples": 2000, "number_of_characters": 478510, - "avg_sentence1_len": 119.5045, - "avg_sentence2_len": 119.7505, + "min_sentence1_length": 2, + "avg_sentence1_length": 119.5045, + "max_sentence1_length": 233, + "unique_sentence1": 1933, + "min_sentence2_length": 2, + "avg_sentence2_length": 119.7505, + "max_sentence2_length": 246, + "unique_sentence2": 1939, "unique_labels": 2, "labels": { "1": { @@ -199,8 +277,14 @@ "ja": { "num_samples": 2000, "number_of_characters": 229655, - "avg_sentence1_len": 57.5105, - "avg_sentence2_len": 57.317, + "min_sentence1_length": 2, + "avg_sentence1_length": 57.5105, + "max_sentence1_length": 126, + "unique_sentence1": 1957, + "min_sentence2_length": 2, + "avg_sentence2_length": 57.317, + "max_sentence2_length": 121, + "unique_sentence2": 1969, "unique_labels": 2, "labels": { "1": { @@ -214,8 +298,14 @@ "ko": { "num_samples": 2000, "number_of_characters": 261355, - "avg_sentence1_len": 65.162, - "avg_sentence2_len": 65.5155, + "min_sentence1_length": 2, + "avg_sentence1_length": 65.162, + "max_sentence1_length": 178, + "unique_sentence1": 1963, + "min_sentence2_length": 2, + "avg_sentence2_length": 65.5155, + "max_sentence2_length": 174, + "unique_sentence2": 1968, "unique_labels": 2, "labels": { "1": { @@ -229,8 +319,14 @@ "zh": { "num_samples": 2000, "number_of_characters": 169419, - "avg_sentence1_len": 42.448, - "avg_sentence2_len": 42.2615, + "min_sentence1_length": 2, + "avg_sentence1_length": 42.448, + "max_sentence1_length": 101, + "unique_sentence1": 1899, + "min_sentence2_length": 2, + "avg_sentence2_length": 42.2615, + "max_sentence2_length": 120, + "unique_sentence2": 1895, "unique_labels": 2, "labels": { "1": { diff --git a/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json b/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json index 6ca4a56161..473a765dd9 100644 --- a/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json +++ b/mteb/descriptive_stats/PairClassification/TwitterURLCorpus.json @@ -2,8 +2,14 @@ "test": { "num_samples": 51534, "number_of_characters": 8659940, - "avg_sentence1_len": 79.48919160166103, - "avg_sentence2_len": 88.5540419916948, + "min_sentence1_length": 24, + "avg_sentence1_length": 79.48919160166103, + "max_sentence1_length": 126, + "unique_sentence1": 4329, + "min_sentence2_length": 6, + "avg_sentence2_length": 88.5540419916948, + "max_sentence2_length": 608, + "unique_sentence2": 41304, "unique_labels": 2, "labels": { "0": { diff --git a/mteb/descriptive_stats/PairClassification/XNLI.json b/mteb/descriptive_stats/PairClassification/XNLI.json index 91ef224350..867fafdc85 100644 --- a/mteb/descriptive_stats/PairClassification/XNLI.json +++ b/mteb/descriptive_stats/PairClassification/XNLI.json @@ -2,8 +2,14 @@ "test": { "num_samples": 19110, "number_of_characters": 2907145, - "avg_sentence1_len": 103.23793825222397, - "avg_sentence2_len": 48.88895866038723, + "min_sentence1_length": 3, + "avg_sentence1_length": 103.23793825222397, + "max_sentence1_length": 401, + "unique_sentence1": 15328, + "min_sentence2_length": 2, + "avg_sentence2_length": 48.88895866038723, + "max_sentence2_length": 187, + "unique_sentence2": 19104, "unique_labels": 2, "labels": { "0": { @@ -17,8 +23,14 @@ "ar": { "num_samples": 1365, "number_of_characters": 179591, - "avg_sentence1_len": 89.57362637362637, - "avg_sentence2_len": 41.99487179487179, + "min_sentence1_length": 11, + "avg_sentence1_length": 89.57362637362637, + "max_sentence1_length": 242, + "unique_sentence1": 1095, + "min_sentence2_length": 8, + "avg_sentence2_length": 41.99487179487179, + "max_sentence2_length": 115, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -32,8 +44,14 @@ "bg": { "num_samples": 1365, "number_of_characters": 220646, - "avg_sentence1_len": 110.01611721611722, - "avg_sentence2_len": 51.62930402930403, + "min_sentence1_length": 14, + "avg_sentence1_length": 110.01611721611722, + "max_sentence1_length": 303, + "unique_sentence1": 1095, + "min_sentence2_length": 8, + "avg_sentence2_length": 51.62930402930403, + "max_sentence2_length": 150, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -47,8 +65,14 @@ "de": { "num_samples": 1365, "number_of_characters": 241224, - "avg_sentence1_len": 119.92600732600732, - "avg_sentence2_len": 56.794871794871796, + "min_sentence1_length": 3, + "avg_sentence1_length": 119.92600732600732, + "max_sentence1_length": 301, + "unique_sentence1": 1095, + "min_sentence2_length": 9, + "avg_sentence2_length": 56.794871794871796, + "max_sentence2_length": 187, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -62,8 +86,14 @@ "el": { "num_samples": 1365, "number_of_characters": 240222, - "avg_sentence1_len": 119.05421245421246, - "avg_sentence2_len": 56.93260073260073, + "min_sentence1_length": 13, + "avg_sentence1_length": 119.05421245421246, + "max_sentence1_length": 344, + "unique_sentence1": 1095, + "min_sentence2_length": 13, + "avg_sentence2_length": 56.93260073260073, + "max_sentence2_length": 172, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -77,8 +107,14 @@ "en": { "num_samples": 1365, "number_of_characters": 212223, - "avg_sentence1_len": 105.67032967032966, - "avg_sentence2_len": 49.8043956043956, + "min_sentence1_length": 19, + "avg_sentence1_length": 105.67032967032966, + "max_sentence1_length": 268, + "unique_sentence1": 1095, + "min_sentence2_length": 9, + "avg_sentence2_length": 49.8043956043956, + "max_sentence2_length": 137, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -92,8 +128,14 @@ "es": { "num_samples": 1365, "number_of_characters": 232207, - "avg_sentence1_len": 115.43296703296703, - "avg_sentence2_len": 54.68205128205128, + "min_sentence1_length": 11, + "avg_sentence1_length": 115.43296703296703, + "max_sentence1_length": 385, + "unique_sentence1": 1094, + "min_sentence2_length": 8, + "avg_sentence2_length": 54.68205128205128, + "max_sentence2_length": 163, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -107,8 +149,14 @@ "fr": { "num_samples": 1365, "number_of_characters": 245259, - "avg_sentence1_len": 121.0967032967033, - "avg_sentence2_len": 58.58021978021978, + "min_sentence1_length": 9, + "avg_sentence1_length": 121.0967032967033, + "max_sentence1_length": 327, + "unique_sentence1": 1095, + "min_sentence2_length": 10, + "avg_sentence2_length": 58.58021978021978, + "max_sentence2_length": 169, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -122,8 +170,14 @@ "hi": { "num_samples": 1365, "number_of_characters": 211312, - "avg_sentence1_len": 104.63443223443224, - "avg_sentence2_len": 50.17289377289377, + "min_sentence1_length": 16, + "avg_sentence1_length": 104.63443223443224, + "max_sentence1_length": 401, + "unique_sentence1": 1095, + "min_sentence2_length": 9, + "avg_sentence2_length": 50.17289377289377, + "max_sentence2_length": 162, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -137,8 +191,14 @@ "ru": { "num_samples": 1365, "number_of_characters": 222797, - "avg_sentence1_len": 110.76923076923077, - "avg_sentence2_len": 52.452014652014654, + "min_sentence1_length": 11, + "avg_sentence1_length": 110.76923076923077, + "max_sentence1_length": 306, + "unique_sentence1": 1095, + "min_sentence2_length": 8, + "avg_sentence2_length": 52.452014652014654, + "max_sentence2_length": 167, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -152,8 +212,14 @@ "sw": { "num_samples": 1365, "number_of_characters": 210103, - "avg_sentence1_len": 104.43956043956044, - "avg_sentence2_len": 49.48205128205128, + "min_sentence1_length": 10, + "avg_sentence1_length": 104.43956043956044, + "max_sentence1_length": 266, + "unique_sentence1": 1094, + "min_sentence2_length": 2, + "avg_sentence2_length": 49.48205128205128, + "max_sentence2_length": 146, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -167,8 +233,14 @@ "th": { "num_samples": 1365, "number_of_characters": 192788, - "avg_sentence1_len": 96.6923076923077, - "avg_sentence2_len": 44.544322344322346, + "min_sentence1_length": 12, + "avg_sentence1_length": 96.6923076923077, + "max_sentence1_length": 262, + "unique_sentence1": 1095, + "min_sentence2_length": 6, + "avg_sentence2_length": 44.544322344322346, + "max_sentence2_length": 129, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -182,8 +254,14 @@ "tr": { "num_samples": 1365, "number_of_characters": 208658, - "avg_sentence1_len": 103.67765567765568, - "avg_sentence2_len": 49.18534798534799, + "min_sentence1_length": 15, + "avg_sentence1_length": 103.67765567765568, + "max_sentence1_length": 255, + "unique_sentence1": 1095, + "min_sentence2_length": 6, + "avg_sentence2_length": 49.18534798534799, + "max_sentence2_length": 140, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -197,8 +275,14 @@ "vi": { "num_samples": 1365, "number_of_characters": 223549, - "avg_sentence1_len": 111.31208791208792, - "avg_sentence2_len": 52.46007326007326, + "min_sentence1_length": 14, + "avg_sentence1_length": 111.31208791208792, + "max_sentence1_length": 265, + "unique_sentence1": 1095, + "min_sentence2_length": 9, + "avg_sentence2_length": 52.46007326007326, + "max_sentence2_length": 143, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -212,8 +296,14 @@ "zh": { "num_samples": 1365, "number_of_characters": 66566, - "avg_sentence1_len": 33.03589743589744, - "avg_sentence2_len": 15.73040293040293, + "min_sentence1_length": 4, + "avg_sentence1_length": 33.03589743589744, + "max_sentence1_length": 112, + "unique_sentence1": 1095, + "min_sentence2_length": 3, + "avg_sentence2_length": 15.73040293040293, + "max_sentence2_length": 59, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -229,8 +319,14 @@ "validation": { "num_samples": 19110, "number_of_characters": 2909058, - "avg_sentence1_len": 103.20790162218734, - "avg_sentence2_len": 49.01909994767138, + "min_sentence1_length": 5, + "avg_sentence1_length": 103.20790162218734, + "max_sentence1_length": 323, + "unique_sentence1": 11171, + "min_sentence2_length": 3, + "avg_sentence2_length": 49.01909994767138, + "max_sentence2_length": 172, + "unique_sentence2": 19101, "unique_labels": 2, "labels": { "0": { @@ -244,8 +340,14 @@ "ar": { "num_samples": 1365, "number_of_characters": 177355, - "avg_sentence1_len": 88.31868131868131, - "avg_sentence2_len": 41.61172161172161, + "min_sentence1_length": 13, + "avg_sentence1_length": 88.31868131868131, + "max_sentence1_length": 214, + "unique_sentence1": 798, + "min_sentence2_length": 6, + "avg_sentence2_length": 41.61172161172161, + "max_sentence2_length": 137, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -259,8 +361,14 @@ "bg": { "num_samples": 1365, "number_of_characters": 219988, - "avg_sentence1_len": 109.196336996337, - "avg_sentence2_len": 51.967032967032964, + "min_sentence1_length": 16, + "avg_sentence1_length": 109.196336996337, + "max_sentence1_length": 316, + "unique_sentence1": 798, + "min_sentence2_length": 10, + "avg_sentence2_length": 51.967032967032964, + "max_sentence2_length": 151, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -274,8 +382,14 @@ "de": { "num_samples": 1365, "number_of_characters": 241852, - "avg_sentence1_len": 119.81172161172161, - "avg_sentence2_len": 57.36923076923077, + "min_sentence1_length": 20, + "avg_sentence1_length": 119.81172161172161, + "max_sentence1_length": 298, + "unique_sentence1": 798, + "min_sentence2_length": 12, + "avg_sentence2_length": 57.36923076923077, + "max_sentence2_length": 162, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -289,8 +403,14 @@ "el": { "num_samples": 1365, "number_of_characters": 241275, - "avg_sentence1_len": 119.87545787545787, - "avg_sentence2_len": 56.88278388278388, + "min_sentence1_length": 16, + "avg_sentence1_length": 119.87545787545787, + "max_sentence1_length": 302, + "unique_sentence1": 798, + "min_sentence2_length": 6, + "avg_sentence2_length": 56.88278388278388, + "max_sentence2_length": 171, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -304,8 +424,14 @@ "en": { "num_samples": 1365, "number_of_characters": 212384, - "avg_sentence1_len": 105.71648351648352, - "avg_sentence2_len": 49.87619047619047, + "min_sentence1_length": 20, + "avg_sentence1_length": 105.71648351648352, + "max_sentence1_length": 271, + "unique_sentence1": 798, + "min_sentence2_length": 8, + "avg_sentence2_length": 49.87619047619047, + "max_sentence2_length": 139, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -319,8 +445,14 @@ "es": { "num_samples": 1365, "number_of_characters": 232451, - "avg_sentence1_len": 115.17289377289377, - "avg_sentence2_len": 55.120879120879124, + "min_sentence1_length": 14, + "avg_sentence1_length": 115.17289377289377, + "max_sentence1_length": 265, + "unique_sentence1": 798, + "min_sentence2_length": 7, + "avg_sentence2_length": 55.120879120879124, + "max_sentence2_length": 148, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -334,8 +466,14 @@ "fr": { "num_samples": 1365, "number_of_characters": 246857, - "avg_sentence1_len": 121.75897435897436, - "avg_sentence2_len": 59.08864468864469, + "min_sentence1_length": 19, + "avg_sentence1_length": 121.75897435897436, + "max_sentence1_length": 323, + "unique_sentence1": 798, + "min_sentence2_length": 11, + "avg_sentence2_length": 59.08864468864469, + "max_sentence2_length": 172, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -349,8 +487,14 @@ "hi": { "num_samples": 1365, "number_of_characters": 212269, - "avg_sentence1_len": 105.06446886446886, - "avg_sentence2_len": 50.44395604395604, + "min_sentence1_length": 18, + "avg_sentence1_length": 105.06446886446886, + "max_sentence1_length": 277, + "unique_sentence1": 798, + "min_sentence2_length": 7, + "avg_sentence2_length": 50.44395604395604, + "max_sentence2_length": 152, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -364,8 +508,14 @@ "ru": { "num_samples": 1365, "number_of_characters": 221152, - "avg_sentence1_len": 109.74725274725274, - "avg_sentence2_len": 52.26886446886447, + "min_sentence1_length": 15, + "avg_sentence1_length": 109.74725274725274, + "max_sentence1_length": 310, + "unique_sentence1": 798, + "min_sentence2_length": 8, + "avg_sentence2_length": 52.26886446886447, + "max_sentence2_length": 140, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -379,8 +529,14 @@ "sw": { "num_samples": 1365, "number_of_characters": 210482, - "avg_sentence1_len": 104.32234432234432, - "avg_sentence2_len": 49.87692307692308, + "min_sentence1_length": 13, + "avg_sentence1_length": 104.32234432234432, + "max_sentence1_length": 264, + "unique_sentence1": 798, + "min_sentence2_length": 8, + "avg_sentence2_length": 49.87692307692308, + "max_sentence2_length": 153, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -394,8 +550,14 @@ "th": { "num_samples": 1365, "number_of_characters": 192640, - "avg_sentence1_len": 97.28498168498169, - "avg_sentence2_len": 43.843223443223444, + "min_sentence1_length": 7, + "avg_sentence1_length": 97.28498168498169, + "max_sentence1_length": 255, + "unique_sentence1": 798, + "min_sentence2_length": 3, + "avg_sentence2_length": 43.843223443223444, + "max_sentence2_length": 140, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -409,8 +571,14 @@ "tr": { "num_samples": 1365, "number_of_characters": 208305, - "avg_sentence1_len": 102.96630036630036, - "avg_sentence2_len": 49.63809523809524, + "min_sentence1_length": 15, + "avg_sentence1_length": 102.96630036630036, + "max_sentence1_length": 269, + "unique_sentence1": 798, + "min_sentence2_length": 10, + "avg_sentence2_length": 49.63809523809524, + "max_sentence2_length": 139, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -424,8 +592,14 @@ "vi": { "num_samples": 1365, "number_of_characters": 224811, - "avg_sentence1_len": 112.26373626373626, - "avg_sentence2_len": 52.432967032967035, + "min_sentence1_length": 18, + "avg_sentence1_length": 112.26373626373626, + "max_sentence1_length": 323, + "unique_sentence1": 798, + "min_sentence2_length": 9, + "avg_sentence2_length": 52.432967032967035, + "max_sentence2_length": 159, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { @@ -439,8 +613,14 @@ "zh": { "num_samples": 1365, "number_of_characters": 67237, - "avg_sentence1_len": 33.41098901098901, - "avg_sentence2_len": 15.846886446886447, + "min_sentence1_length": 5, + "avg_sentence1_length": 33.41098901098901, + "max_sentence1_length": 135, + "unique_sentence1": 798, + "min_sentence2_length": 3, + "avg_sentence2_length": 15.846886446886447, + "max_sentence2_length": 66, + "unique_sentence2": 1365, "unique_labels": 2, "labels": { "0": { diff --git a/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json b/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json index c12f4f292f..a0ced7def7 100644 --- a/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json +++ b/mteb/descriptive_stats/Reranking/AskUbuntuDupQuestions.json @@ -4,8 +4,17 @@ "number_of_characters": 413674, "num_positive": 2255, "num_negative": 5245, - "avg_query_len": 50.205333333333336, - "avg_positive_len": 52.54013303769401, - "avg_negative_len": 52.69189704480458 + "min_query_length": 17, + "avg_query_length": 50.205333333333336, + "max_query_length": 148, + "unique_query": 374, + "min_positive_length": 15, + "avg_positive_length": 52.54013303769401, + "max_positive_length": 152, + "unique_positive": 2165, + "min_negative_length": 15, + "avg_negative_length": 52.69189704480458, + "max_negative_length": 148, + "unique_negative": 5002 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Reranking/ESCIReranking.json b/mteb/descriptive_stats/Reranking/ESCIReranking.json index 419b228ebf..9c9556be9d 100644 --- a/mteb/descriptive_stats/Reranking/ESCIReranking.json +++ b/mteb/descriptive_stats/Reranking/ESCIReranking.json @@ -4,36 +4,72 @@ "number_of_characters": 254538331, "num_positive": 271416, "num_negative": 44235, - "avg_query_len": 19.691890046098685, - "avg_positive_len": 803.9230995961918, - "avg_negative_len": 808.501458121397, + "min_query_length": 1, + "avg_query_length": 19.691890046098685, + "max_query_length": 151, + "unique_query": 29269, + "min_positive_length": 1, + "avg_positive_length": 803.9230995961918, + "max_positive_length": 8640, + "unique_positive": 217712, + "min_negative_length": 1, + "avg_negative_length": 808.501458121397, + "max_negative_length": 4441, + "unique_negative": 39551, "hf_subset_descriptive_stats": { "us": { "num_samples": 21296, "number_of_characters": 186915609, "num_positive": 189375, "num_negative": 25463, - "avg_query_len": 21.440833959429, - "avg_positive_len": 868.3698006600661, - "avg_negative_len": 864.4493578918431 + "min_query_length": 1, + "avg_query_length": 21.440833959429, + "max_query_length": 151, + "unique_query": 21296, + "min_positive_length": 1, + "avg_positive_length": 868.3698006600661, + "max_positive_length": 5545, + "unique_positive": 150734, + "min_negative_length": 1, + "avg_negative_length": 864.4493578918431, + "max_negative_length": 3779, + "unique_negative": 23073 }, "es": { "num_samples": 3703, "number_of_characters": 48861389, "num_positive": 39110, "num_negative": 10183, - "avg_query_len": 20.681609505806104, - "avg_positive_len": 980.9613142418818, - "avg_negative_len": 1023.2159481488756 + "min_query_length": 3, + "avg_query_length": 20.681609505806104, + "max_query_length": 59, + "unique_query": 3703, + "min_positive_length": 1, + "avg_positive_length": 980.9613142418818, + "max_positive_length": 8640, + "unique_positive": 32921, + "min_negative_length": 1, + "avg_negative_length": 1023.2159481488756, + "max_negative_length": 4441, + "unique_negative": 9285 }, "jp": { "num_samples": 4286, "number_of_characters": 18761333, "num_positive": 42931, "num_negative": 8589, - "avg_query_len": 10.146756882874476, - "avg_positive_len": 358.35792317905475, - "avg_negative_len": 388.075445337059 + "min_query_length": 1, + "avg_query_length": 10.146756882874476, + "max_query_length": 60, + "unique_query": 4286, + "min_positive_length": 1, + "avg_positive_length": 358.35792317905475, + "max_positive_length": 3488, + "unique_positive": 35165, + "min_negative_length": 1, + "avg_negative_length": 388.075445337059, + "max_negative_length": 3940, + "unique_negative": 7289 } } } diff --git a/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json b/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json index 1c5fe0f039..0506ff39e5 100644 --- a/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json +++ b/mteb/descriptive_stats/Reranking/WikipediaRerankingMultilingual.json @@ -4,153 +4,306 @@ "number_of_characters": 83866932, "num_positive": 24000, "num_negative": 192000, - "avg_query_len": 59.091208333333334, - "avg_positive_len": 385.45120833333334, - "avg_negative_len": 381.23913541666667, + "min_query_length": 7, + "avg_query_length": 59.091208333333334, + "max_query_length": 180, + "unique_query": 23997, + "min_positive_length": 100, + "avg_positive_length": 385.45120833333334, + "max_positive_length": 3515, + "unique_positive": 23993, + "min_negative_length": 100, + "avg_negative_length": 381.23913541666667, + "max_negative_length": 9461, + "unique_negative": 191783, "hf_subset_descriptive_stats": { "bg": { "num_samples": 1500, "number_of_characters": 5145316, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 60.82666666666667, - "avg_positive_len": 375.88866666666667, - "avg_negative_len": 374.18691666666666 + "min_query_length": 18, + "avg_query_length": 60.82666666666667, + "max_query_length": 166, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 375.88866666666667, + "max_positive_length": 2241, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 374.18691666666666, + "max_negative_length": 4869, + "unique_negative": 11996 }, "bn": { "num_samples": 1500, "number_of_characters": 5390581, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 47.266666666666666, - "avg_positive_len": 394.5946666666667, - "avg_negative_len": 393.98241666666667 + "min_query_length": 7, + "avg_query_length": 47.266666666666666, + "max_query_length": 123, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 394.5946666666667, + "max_positive_length": 2338, + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 393.98241666666667, + "max_negative_length": 5104, + "unique_negative": 11996 }, "cs": { "num_samples": 1500, "number_of_characters": 5079180, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 56.272, - "avg_positive_len": 383.8446666666667, - "avg_negative_len": 368.2504166666667 + "min_query_length": 17, + "avg_query_length": 56.272, + "max_query_length": 137, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 383.8446666666667, + "max_positive_length": 2300, + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 368.2504166666667, + "max_negative_length": 3487, + "unique_negative": 11982 }, "da": { "num_samples": 1500, "number_of_characters": 4746132, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 56.75066666666667, - "avg_positive_len": 351.6813333333333, - "avg_negative_len": 344.457 + "min_query_length": 17, + "avg_query_length": 56.75066666666667, + "max_query_length": 137, + "unique_query": 1499, + "min_positive_length": 100, + "avg_positive_length": 351.6813333333333, + "max_positive_length": 2159, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 344.457, + "max_negative_length": 2563, + "unique_negative": 11972 }, "de": { "num_samples": 1500, "number_of_characters": 5483592, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 70.004, - "avg_positive_len": 391.5366666666667, - "avg_negative_len": 399.27341666666666 + "min_query_length": 20, + "avg_query_length": 70.004, + "max_query_length": 180, + "unique_query": 1499, + "min_positive_length": 100, + "avg_positive_length": 391.5366666666667, + "max_positive_length": 2674, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 399.27341666666666, + "max_negative_length": 3083, + "unique_negative": 12000 }, "en": { "num_samples": 1500, "number_of_characters": 6217884, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 68.372, - "avg_positive_len": 451.72866666666664, - "avg_negative_len": 453.14441666666664 + "min_query_length": 18, + "avg_query_length": 68.372, + "max_query_length": 162, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 451.72866666666664, + "max_positive_length": 3515, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 453.14441666666664, + "max_negative_length": 3662, + "unique_negative": 12000 }, "fa": { "num_samples": 1500, "number_of_characters": 4732619, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 48.66733333333333, - "avg_positive_len": 347.704, - "avg_negative_len": 344.8385 + "min_query_length": 12, + "avg_query_length": 48.66733333333333, + "max_query_length": 119, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 347.704, + "max_positive_length": 2571, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 344.8385, + "max_negative_length": 4707, + "unique_negative": 11978 }, "fi": { "num_samples": 1500, "number_of_characters": 5209132, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 55.343333333333334, - "avg_positive_len": 394.7126666666667, - "avg_negative_len": 377.83733333333333 + "min_query_length": 14, + "avg_query_length": 55.343333333333334, + "max_query_length": 132, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 394.7126666666667, + "max_positive_length": 2129, + "unique_positive": 1498, + "min_negative_length": 100, + "avg_negative_length": 377.83733333333333, + "max_negative_length": 2574, + "unique_negative": 11972 }, "hi": { "num_samples": 1500, "number_of_characters": 5620959, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 50.77733333333333, - "avg_positive_len": 420.3786666666667, - "avg_negative_len": 409.51875 + "min_query_length": 13, + "avg_query_length": 50.77733333333333, + "max_query_length": 125, + "unique_query": 1499, + "min_positive_length": 100, + "avg_positive_length": 420.3786666666667, + "max_positive_length": 2361, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 409.51875, + "max_negative_length": 5912, + "unique_negative": 11996 }, "it": { "num_samples": 1500, "number_of_characters": 5420496, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 70.05466666666666, - "avg_positive_len": 396.97333333333336, - "avg_negative_len": 393.3295 + "min_query_length": 23, + "avg_query_length": 70.05466666666666, + "max_query_length": 156, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 396.97333333333336, + "max_positive_length": 2082, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 393.3295, + "max_negative_length": 9461, + "unique_negative": 11993 }, "nl": { "num_samples": 1500, "number_of_characters": 5169556, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 65.34466666666667, - "avg_positive_len": 380.79133333333334, - "avg_negative_len": 375.02933333333334 + "min_query_length": 18, + "avg_query_length": 65.34466666666667, + "max_query_length": 136, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 380.79133333333334, + "max_positive_length": 1864, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 375.02933333333334, + "max_negative_length": 3641, + "unique_negative": 11985 }, "pt": { "num_samples": 1500, "number_of_characters": 5474356, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 65.11933333333333, - "avg_positive_len": 404.01933333333335, - "avg_negative_len": 397.554 + "min_query_length": 18, + "avg_query_length": 65.11933333333333, + "max_query_length": 176, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 404.01933333333335, + "max_positive_length": 3057, + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 397.554, + "max_negative_length": 2877, + "unique_negative": 11991 }, "ro": { "num_samples": 1500, "number_of_characters": 4796113, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 61.973333333333336, - "avg_positive_len": 346.70866666666666, - "avg_negative_len": 348.5908333333333 + "min_query_length": 14, + "avg_query_length": 61.973333333333336, + "max_query_length": 169, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 346.70866666666666, + "max_positive_length": 1917, + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 348.5908333333333, + "max_negative_length": 4213, + "unique_negative": 11971 }, "sr": { "num_samples": 1500, "number_of_characters": 5271732, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 55.669333333333334, - "avg_positive_len": 386.34933333333333, - "avg_negative_len": 384.0586666666667 + "min_query_length": 15, + "avg_query_length": 55.669333333333334, + "max_query_length": 146, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 386.34933333333333, + "max_positive_length": 2421, + "unique_positive": 1499, + "min_negative_length": 100, + "avg_negative_length": 384.0586666666667, + "max_negative_length": 3668, + "unique_negative": 11974 }, "no": { "num_samples": 1500, "number_of_characters": 5036586, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 55.288, - "avg_positive_len": 367.72, - "avg_negative_len": 366.8395 + "min_query_length": 14, + "avg_query_length": 55.288, + "max_query_length": 129, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 367.72, + "max_positive_length": 1450, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 366.8395, + "max_negative_length": 2841, + "unique_negative": 11996 }, "sv": { "num_samples": 1500, "number_of_characters": 5072698, "num_positive": 1500, "num_negative": 12000, - "avg_query_len": 57.73, - "avg_positive_len": 372.58733333333333, - "avg_negative_len": 368.93516666666665 + "min_query_length": 17, + "avg_query_length": 57.73, + "max_query_length": 133, + "unique_query": 1500, + "min_positive_length": 100, + "avg_positive_length": 372.58733333333333, + "max_positive_length": 2493, + "unique_positive": 1500, + "min_negative_length": 100, + "avg_negative_length": 368.93516666666665, + "max_negative_length": 3680, + "unique_negative": 11999 } } } diff --git a/mteb/descriptive_stats/Retrieval/AppsRetrieval.json b/mteb/descriptive_stats/Retrieval/AppsRetrieval.json index 8a71a1ad1a..caaab2453b 100644 --- a/mteb/descriptive_stats/Retrieval/AppsRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/AppsRetrieval.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 2245.837090504686, + "number_of_characters": 11335620, "num_samples": 12530, "num_queries": 3765, "num_documents": 8765, - "average_document_length": 0.0657169048317138, - "average_query_length": 0.4435135244766838, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 152, + "average_document_length": 717.2737022247576, + "max_document_length": 5742, + "unique_documents": 8765, + "min_query_length": 6, + "average_query_length": 1340.9604249667996, + "max_query_length": 289049, + "unique_queries": 3765, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 3765 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json b/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json index fe213d96d7..78c8a7e121 100644 --- a/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/BelebeleRetrieval.json @@ -1,3396 +1,6789 @@ { "test": { - "number_of_characters": 76.49551684802204, + "number_of_characters": 25574620, "num_samples": 521866, "num_queries": 338378, "num_documents": 183488, - "average_document_length": 1.0899895361004534e-05, - "average_query_length": 0.000220154728877238, + "min_document_length": 4, + "average_document_length": 137.38034094872688, + "max_document_length": 237, + "unique_documents": 183488, + "min_query_length": 2, + "average_query_length": 1.0845149507355678, + "max_query_length": 2, + "unique_queries": 338378, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0000413738481817, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 183488, "hf_subset_descriptive_stats": { "acm_Arab-acm_Arab": { - "number_of_characters": 57.84, + "number_of_characters": 51232, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06204444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 102.98360655737704, + "max_document_length": 129, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "acm_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-acm_Arab": { - "number_of_characters": 57.84, + "number_of_characters": 51232, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06204444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 102.98360655737704, + "max_document_length": 129, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "afr_Latn-afr_Latn": { - "number_of_characters": 80.04555555555555, + "number_of_characters": 71217, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08671728395061729, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 143.93647540983608, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "afr_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-afr_Latn": { - "number_of_characters": 80.04555555555555, + "number_of_characters": 71217, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08671728395061729, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 143.93647540983608, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "als_Latn-als_Latn": { - "number_of_characters": 78.13555555555556, + "number_of_characters": 69498, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08459506172839507, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 140.4139344262295, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "als_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-als_Latn": { - "number_of_characters": 78.13555555555556, + "number_of_characters": 69498, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08459506172839507, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 140.4139344262295, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "amh_Ethi-amh_Ethi": { - "number_of_characters": 51.16111111111111, + "number_of_characters": 45221, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.05462345679012346, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 90.66598360655738, + "max_document_length": 100, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "amh_Ethi-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-amh_Ethi": { - "number_of_characters": 51.16111111111111, + "number_of_characters": 45221, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.05462345679012346, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 90.66598360655738, + "max_document_length": 100, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "apc_Arab-apc_Arab": { - "number_of_characters": 57.85777777777778, + "number_of_characters": 51248, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.062064197530864194, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 103.01639344262296, + "max_document_length": 134, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "apc_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-apc_Arab": { - "number_of_characters": 57.85777777777778, + "number_of_characters": 51248, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.062064197530864194, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 103.01639344262296, + "max_document_length": 134, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Arab-arb_Arab": { - "number_of_characters": 60.55, + "number_of_characters": 53671, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06505555555555555, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 107.98155737704919, + "max_document_length": 134, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-arb_Arab": { - "number_of_characters": 60.55, + "number_of_characters": 53671, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06505555555555555, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 107.98155737704919, + "max_document_length": 134, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Latn-arb_Latn": { - "number_of_characters": 69.02444444444444, + "number_of_characters": 61298, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0744716049382716, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 123.61065573770492, + "max_document_length": 160, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-arb_Latn": { - "number_of_characters": 69.02444444444444, + "number_of_characters": 61298, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0744716049382716, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 123.61065573770492, + "max_document_length": 160, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ars_Arab-ars_Arab": { - "number_of_characters": 58.43222222222222, + "number_of_characters": 51765, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06270246913580246, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 104.07581967213115, + "max_document_length": 119, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ars_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ars_Arab": { - "number_of_characters": 58.43222222222222, + "number_of_characters": 51765, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06270246913580246, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 104.07581967213115, + "max_document_length": 119, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ary_Arab-ary_Arab": { - "number_of_characters": 68.01893095768374, + "number_of_characters": 60261, "num_samples": 1386, "num_queries": 898, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07351774048739837, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 121.48565573770492, + "max_document_length": 138, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.086859688195991, + "max_query_length": 2, + "unique_queries": 898, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ary_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ary_Arab": { - "number_of_characters": 68.01893095768374, + "number_of_characters": 60261, "num_samples": 1386, "num_queries": 898, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07351774048739837, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 121.48565573770492, + "max_document_length": 138, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.086859688195991, + "max_query_length": 2, + "unique_queries": 898, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arz_Arab-arz_Arab": { - "number_of_characters": 59.14111111111111, + "number_of_characters": 52403, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06349012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 105.38319672131148, + "max_document_length": 115, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arz_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-arz_Arab": { - "number_of_characters": 59.14111111111111, + "number_of_characters": 52403, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06349012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 105.38319672131148, + "max_document_length": 115, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "asm_Beng-asm_Beng": { - "number_of_characters": 70.26, + "number_of_characters": 62410, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07584444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 4, + "average_document_length": 125.88934426229508, + "max_document_length": 158, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "asm_Beng-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-asm_Beng": { - "number_of_characters": 70.26, + "number_of_characters": 62410, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07584444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 4, + "average_document_length": 125.88934426229508, + "max_document_length": 158, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "azj_Latn-azj_Latn": { - "number_of_characters": 75.51222222222222, + "number_of_characters": 67137, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08168024691358025, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 135.57581967213116, + "max_document_length": 156, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "azj_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-azj_Latn": { - "number_of_characters": 75.51222222222222, + "number_of_characters": 67137, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08168024691358025, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 135.57581967213116, + "max_document_length": 156, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bam_Latn-bam_Latn": { - "number_of_characters": 74.34222222222222, + "number_of_characters": 66084, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08038024691358024, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 133.41803278688525, + "max_document_length": 166, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bam_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-bam_Latn": { - "number_of_characters": 74.34222222222222, + "number_of_characters": 66084, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08038024691358024, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 133.41803278688525, + "max_document_length": 166, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Beng-ben_Beng": { - "number_of_characters": 71.48444444444445, + "number_of_characters": 63512, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07720493827160495, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 128.14754098360655, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Beng-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ben_Beng": { - "number_of_characters": 71.48444444444445, + "number_of_characters": 63512, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07720493827160495, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 128.14754098360655, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Latn-ben_Latn": { - "number_of_characters": 76.78777777777778, + "number_of_characters": 68285, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08309753086419754, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 137.92827868852459, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ben_Latn": { - "number_of_characters": 76.78777777777778, + "number_of_characters": 68285, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08309753086419754, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 137.92827868852459, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bod_Tibt-bod_Tibt": { - "number_of_characters": 88.90222222222222, + "number_of_characters": 79188, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09655802469135802, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 160.2704918032787, + "max_document_length": 213, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bod_Tibt-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-bod_Tibt": { - "number_of_characters": 88.90222222222222, + "number_of_characters": 79188, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09655802469135802, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 160.2704918032787, + "max_document_length": 213, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bul_Cyrl-bul_Cyrl": { - "number_of_characters": 74.89, + "number_of_characters": 66577, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08098888888888889, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 134.42827868852459, + "max_document_length": 177, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "bul_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-bul_Cyrl": { - "number_of_characters": 74.89, + "number_of_characters": 66577, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08098888888888889, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 134.42827868852459, + "max_document_length": 177, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "cat_Latn-cat_Latn": { - "number_of_characters": 77.40666666666667, + "number_of_characters": 68842, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08378518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 139.06967213114754, + "max_document_length": 163, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "cat_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-cat_Latn": { - "number_of_characters": 77.40666666666667, + "number_of_characters": 68842, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08378518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 139.06967213114754, + "max_document_length": 163, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ceb_Latn-ceb_Latn": { - "number_of_characters": 83.19666666666667, + "number_of_characters": 74053, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09021851851851853, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 149.74795081967213, + "max_document_length": 184, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ceb_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ceb_Latn": { - "number_of_characters": 83.19666666666667, + "number_of_characters": 74053, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09021851851851853, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 149.74795081967213, + "max_document_length": 184, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ces_Latn-ces_Latn": { - "number_of_characters": 69.73333333333333, + "number_of_characters": 61936, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07525925925925926, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 124.91803278688525, + "max_document_length": 139, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ces_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ces_Latn": { - "number_of_characters": 69.73333333333333, + "number_of_characters": 61936, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07525925925925926, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 124.91803278688525, + "max_document_length": 139, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ckb_Arab-ckb_Arab": { - "number_of_characters": 73.04555555555555, + "number_of_characters": 64917, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0789395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 131.0266393442623, + "max_document_length": 178, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ckb_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ckb_Arab": { - "number_of_characters": 73.04555555555555, + "number_of_characters": 64917, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0789395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 131.0266393442623, + "max_document_length": 178, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "dan_Latn-dan_Latn": { - "number_of_characters": 74.96888888888888, + "number_of_characters": 66648, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08107654320987653, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 134.5737704918033, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "dan_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-dan_Latn": { - "number_of_characters": 74.96888888888888, + "number_of_characters": 66648, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08107654320987653, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 134.5737704918033, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "deu_Latn-deu_Latn": { - "number_of_characters": 77.32444444444444, + "number_of_characters": 68768, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08369382716049382, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 138.91803278688525, + "max_document_length": 182, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "deu_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-deu_Latn": { - "number_of_characters": 77.32444444444444, + "number_of_characters": 68768, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08369382716049382, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 138.91803278688525, + "max_document_length": 182, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ell_Grek-ell_Grek": { - "number_of_characters": 88.92666666666666, + "number_of_characters": 79210, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09658518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 160.3155737704918, + "max_document_length": 212, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ell_Grek-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ell_Grek": { - "number_of_characters": 88.92666666666666, + "number_of_characters": 79210, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09658518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 160.3155737704918, + "max_document_length": 212, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "est_Latn-est_Latn": { - "number_of_characters": 69.55888888888889, + "number_of_characters": 61779, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07506543209876543, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 124.59631147540983, + "max_document_length": 164, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "est_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-est_Latn": { - "number_of_characters": 69.55888888888889, + "number_of_characters": 61779, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07506543209876543, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 124.59631147540983, + "max_document_length": 164, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eus_Latn-eus_Latn": { - "number_of_characters": 76.44777777777777, + "number_of_characters": 67979, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08271975308641975, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 137.3012295081967, + "max_document_length": 169, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eus_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-eus_Latn": { - "number_of_characters": 76.44777777777777, + "number_of_characters": 67979, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08271975308641975, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 137.3012295081967, + "max_document_length": 169, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fin_Latn-fin_Latn": { - "number_of_characters": 74.50888888888889, + "number_of_characters": 66234, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08056543209876543, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.72540983606558, + "max_document_length": 161, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-fin_Latn": { - "number_of_characters": 74.50888888888889, + "number_of_characters": 66234, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08056543209876543, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.72540983606558, + "max_document_length": 161, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fra_Latn-fra_Latn": { - "number_of_characters": 92.54222222222222, + "number_of_characters": 82464, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10060246913580247, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 166.98360655737704, + "max_document_length": 204, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fra_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-fra_Latn": { - "number_of_characters": 92.54222222222222, + "number_of_characters": 82464, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10060246913580247, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 166.98360655737704, + "max_document_length": 204, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fuv_Latn-fuv_Latn": { - "number_of_characters": 60.42111111111111, + "number_of_characters": 53555, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06491234567901234, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 107.7438524590164, + "max_document_length": 122, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "fuv_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-fuv_Latn": { - "number_of_characters": 60.42111111111111, + "number_of_characters": 53555, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06491234567901234, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 107.7438524590164, + "max_document_length": 122, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "gaz_Latn-gaz_Latn": { - "number_of_characters": 87.93222222222222, + "number_of_characters": 78315, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09548024691358024, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 158.48155737704917, + "max_document_length": 191, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "gaz_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-gaz_Latn": { - "number_of_characters": 87.93222222222222, + "number_of_characters": 78315, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09548024691358024, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 158.48155737704917, + "max_document_length": 191, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "grn_Latn-grn_Latn": { - "number_of_characters": 77.10666666666667, + "number_of_characters": 68572, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08345185185185186, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 138.51639344262296, + "max_document_length": 161, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "grn_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-grn_Latn": { - "number_of_characters": 77.10666666666667, + "number_of_characters": 68572, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08345185185185186, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 138.51639344262296, + "max_document_length": 161, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "guj_Gujr-guj_Gujr": { - "number_of_characters": 64.25666666666666, + "number_of_characters": 57007, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06917407407407407, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 114.81762295081967, + "max_document_length": 138, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "guj_Gujr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-guj_Gujr": { - "number_of_characters": 64.25666666666666, + "number_of_characters": 57007, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06917407407407407, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 114.81762295081967, + "max_document_length": 138, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hat_Latn-hat_Latn": { - "number_of_characters": 72.64666666666666, + "number_of_characters": 64558, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07849629629629629, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 130.29098360655738, + "max_document_length": 179, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hat_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hat_Latn": { - "number_of_characters": 72.64666666666666, + "number_of_characters": 64558, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07849629629629629, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 130.29098360655738, + "max_document_length": 179, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hau_Latn-hau_Latn": { - "number_of_characters": 87.8488888888889, + "number_of_characters": 78240, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09538765432098766, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 158.327868852459, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hau_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hau_Latn": { - "number_of_characters": 87.8488888888889, + "number_of_characters": 78240, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09538765432098766, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 158.327868852459, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "heb_Hebr-heb_Hebr": { - "number_of_characters": 57.135555555555555, + "number_of_characters": 50598, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06126172839506173, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 101.68442622950819, + "max_document_length": 134, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "heb_Hebr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-heb_Hebr": { - "number_of_characters": 57.135555555555555, + "number_of_characters": 50598, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06126172839506173, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 101.68442622950819, + "max_document_length": 134, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Deva-hin_Deva": { - "number_of_characters": 74.61777777777777, + "number_of_characters": 66332, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08068641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.9262295081967, + "max_document_length": 165, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Deva-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hin_Deva": { - "number_of_characters": 74.61777777777777, + "number_of_characters": 66332, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08068641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.9262295081967, + "max_document_length": 165, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Latn-hin_Latn": { - "number_of_characters": 76.81222222222222, + "number_of_characters": 68307, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312469135802468, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.9733606557377, + "max_document_length": 170, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hin_Latn": { - "number_of_characters": 76.81222222222222, + "number_of_characters": 68307, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312469135802468, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.9733606557377, + "max_document_length": 170, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hrv_Latn-hrv_Latn": { - "number_of_characters": 70.83555555555556, + "number_of_characters": 62928, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07648395061728396, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 126.95081967213115, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hrv_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hrv_Latn": { - "number_of_characters": 70.83555555555556, + "number_of_characters": 62928, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07648395061728396, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 126.95081967213115, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hun_Latn-hun_Latn": { - "number_of_characters": 76.40555555555555, + "number_of_characters": 67941, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08267283950617284, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 137.2233606557377, + "max_document_length": 176, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hun_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hun_Latn": { - "number_of_characters": 76.40555555555555, + "number_of_characters": 67941, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08267283950617284, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 137.2233606557377, + "max_document_length": 176, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hye_Armn-hye_Armn": { - "number_of_characters": 77.42555555555556, + "number_of_characters": 68859, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08380617283950619, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 139.1045081967213, + "max_document_length": 193, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hye_Armn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-hye_Armn": { - "number_of_characters": 77.42555555555556, + "number_of_characters": 68859, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08380617283950619, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 139.1045081967213, + "max_document_length": 193, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ibo_Latn-ibo_Latn": { - "number_of_characters": 74.51501668520578, + "number_of_characters": 66167, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08066186505584626, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 19, + "average_document_length": 133.58811475409837, + "max_document_length": 156, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "ibo_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ibo_Latn": { - "number_of_characters": 74.51501668520578, + "number_of_characters": 66167, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08066186505584626, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 19, + "average_document_length": 133.58811475409837, + "max_document_length": 156, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "ilo_Latn-ilo_Latn": { - "number_of_characters": 87.7611111111111, + "number_of_characters": 78161, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09529012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 158.16598360655738, + "max_document_length": 187, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ilo_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ilo_Latn": { - "number_of_characters": 87.7611111111111, + "number_of_characters": 78161, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09529012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 158.16598360655738, + "max_document_length": 187, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ind_Latn-ind_Latn": { - "number_of_characters": 84.10555555555555, + "number_of_characters": 74871, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09122839506172839, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 151.42418032786884, + "max_document_length": 207, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ind_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ind_Latn": { - "number_of_characters": 84.10555555555555, + "number_of_characters": 74871, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09122839506172839, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 151.42418032786884, + "max_document_length": 207, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "isl_Latn-isl_Latn": { - "number_of_characters": 79.27333333333333, + "number_of_characters": 70522, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08585925925925925, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 142.5122950819672, + "max_document_length": 170, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "isl_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-isl_Latn": { - "number_of_characters": 79.27333333333333, + "number_of_characters": 70522, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08585925925925925, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 142.5122950819672, + "max_document_length": 170, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ita_Latn-ita_Latn": { - "number_of_characters": 85.49777777777778, + "number_of_characters": 76124, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09277530864197532, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 153.99180327868854, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ita_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ita_Latn": { - "number_of_characters": 85.49777777777778, + "number_of_characters": 76124, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09277530864197532, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 153.99180327868854, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "jav_Latn-jav_Latn": { - "number_of_characters": 80.60666666666667, + "number_of_characters": 71722, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08734074074074075, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 144.97131147540983, + "max_document_length": 174, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "jav_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-jav_Latn": { - "number_of_characters": 80.60666666666667, + "number_of_characters": 71722, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08734074074074075, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 144.97131147540983, + "max_document_length": 174, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "jpn_Jpan-jpn_Jpan": { - "number_of_characters": 37.79, + "number_of_characters": 33187, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.039766666666666665, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 66.0061475409836, + "max_document_length": 76, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "jpn_Jpan-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-jpn_Jpan": { - "number_of_characters": 37.79, + "number_of_characters": 33187, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.039766666666666665, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 66.0061475409836, + "max_document_length": 76, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kac_Latn-kac_Latn": { - "number_of_characters": 100.64182424916574, + "number_of_characters": 89655, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10972394243511205, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 181.71926229508196, + "max_document_length": 195, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kac_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kac_Latn": { - "number_of_characters": 100.64182424916574, + "number_of_characters": 89655, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10972394243511205, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 181.71926229508196, + "max_document_length": 195, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kan_Knda-kan_Knda": { - "number_of_characters": 74.13666666666667, + "number_of_characters": 65899, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08015185185185185, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.0389344262295, + "max_document_length": 165, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kan_Knda-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kan_Knda": { - "number_of_characters": 74.13666666666667, + "number_of_characters": 65899, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08015185185185185, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.0389344262295, + "max_document_length": 165, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kat_Geor-kat_Geor": { - "number_of_characters": 76.81444444444445, + "number_of_characters": 68309, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312716049382717, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.97745901639345, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kat_Geor-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kat_Geor": { - "number_of_characters": 76.81444444444445, + "number_of_characters": 68309, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312716049382717, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.97745901639345, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kaz_Cyrl-kaz_Cyrl": { - "number_of_characters": 72.75666666666666, + "number_of_characters": 64657, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07861851851851852, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 130.49385245901638, + "max_document_length": 158, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kaz_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kaz_Cyrl": { - "number_of_characters": 72.75666666666666, + "number_of_characters": 64657, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07861851851851852, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 130.49385245901638, + "max_document_length": 158, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kea_Latn-kea_Latn": { - "number_of_characters": 77.94111111111111, + "number_of_characters": 69323, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08437901234567902, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 140.05532786885246, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kea_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kea_Latn": { - "number_of_characters": 77.94111111111111, + "number_of_characters": 69323, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08437901234567902, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 140.05532786885246, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "khk_Cyrl-khk_Cyrl": { - "number_of_characters": 75.33444444444444, + "number_of_characters": 66977, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08148271604938272, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 135.24795081967213, + "max_document_length": 162, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "khk_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-khk_Cyrl": { - "number_of_characters": 75.33444444444444, + "number_of_characters": 66977, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08148271604938272, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 135.24795081967213, + "max_document_length": 162, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "khm_Khmr-khm_Khmr": { - "number_of_characters": 77.74888888888889, + "number_of_characters": 69150, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08416543209876542, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 139.70081967213116, + "max_document_length": 169, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "khm_Khmr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-khm_Khmr": { - "number_of_characters": 77.74888888888889, + "number_of_characters": 69150, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08416543209876542, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 139.70081967213116, + "max_document_length": 169, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kin_Latn-kin_Latn": { - "number_of_characters": 81.89655172413794, + "number_of_characters": 72803, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08887269379770626, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 13, + "average_document_length": 147.18647540983608, + "max_document_length": 194, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "kin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kin_Latn": { - "number_of_characters": 81.89655172413794, + "number_of_characters": 72803, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08887269379770626, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 13, + "average_document_length": 147.18647540983608, + "max_document_length": 194, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "kir_Cyrl-kir_Cyrl": { - "number_of_characters": 76.42333333333333, + "number_of_characters": 67957, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0826925925925926, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 137.25614754098362, + "max_document_length": 182, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kir_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kir_Cyrl": { - "number_of_characters": 76.42333333333333, + "number_of_characters": 67957, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0826925925925926, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 137.25614754098362, + "max_document_length": 182, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kor_Hang-kor_Hang": { - "number_of_characters": 37.257777777777775, + "number_of_characters": 32708, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.039175308641975305, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 65.02459016393442, + "max_document_length": 88, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "kor_Hang-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-kor_Hang": { - "number_of_characters": 37.257777777777775, + "number_of_characters": 32708, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.039175308641975305, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 65.02459016393442, + "max_document_length": 88, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lao_Laoo-lao_Laoo": { - "number_of_characters": 65.31333333333333, + "number_of_characters": 57958, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07034814814814815, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 116.76639344262296, + "max_document_length": 142, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lao_Laoo-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lao_Laoo": { - "number_of_characters": 65.31333333333333, + "number_of_characters": 57958, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07034814814814815, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 116.76639344262296, + "max_document_length": 142, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lin_Latn-lin_Latn": { - "number_of_characters": 83.56681514476615, + "number_of_characters": 74223, "num_samples": 1386, "num_queries": 898, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09083164270018503, - "average_relevant_docs_per_query": 1.0022271714922049 + "min_document_length": 17, + "average_document_length": 150.09631147540983, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.086859688195991, + "max_query_length": 2, + "unique_queries": 898, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0022271714922049, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "lin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lin_Latn": { - "number_of_characters": 83.56681514476615, + "number_of_characters": 74223, "num_samples": 1386, "num_queries": 898, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09083164270018503, - "average_relevant_docs_per_query": 1.0022271714922049 + "min_document_length": 17, + "average_document_length": 150.09631147540983, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.086859688195991, + "max_query_length": 2, + "unique_queries": 898, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0022271714922049, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "lit_Latn-lit_Latn": { - "number_of_characters": 70.69888888888889, + "number_of_characters": 62805, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0763320987654321, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 126.69877049180327, + "max_document_length": 167, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lit_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lit_Latn": { - "number_of_characters": 70.69888888888889, + "number_of_characters": 62805, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0763320987654321, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 126.69877049180327, + "max_document_length": 167, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lug_Latn-lug_Latn": { - "number_of_characters": 80.52057842046719, + "number_of_characters": 71566, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08734213394935171, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 12, + "average_document_length": 144.6516393442623, + "max_document_length": 237, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "lug_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lug_Latn": { - "number_of_characters": 80.52057842046719, + "number_of_characters": 71566, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08734213394935171, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 12, + "average_document_length": 144.6516393442623, + "max_document_length": 237, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "luo_Latn-luo_Latn": { - "number_of_characters": 75.14333333333333, + "number_of_characters": 66805, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08127037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 134.8954918032787, + "max_document_length": 178, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "luo_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-luo_Latn": { - "number_of_characters": 75.14333333333333, + "number_of_characters": 66805, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08127037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 134.8954918032787, + "max_document_length": 178, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lvs_Latn-lvs_Latn": { - "number_of_characters": 71.97888888888889, + "number_of_characters": 63957, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07775432098765432, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 129.0594262295082, + "max_document_length": 172, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "lvs_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-lvs_Latn": { - "number_of_characters": 71.97888888888889, + "number_of_characters": 63957, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07775432098765432, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 129.0594262295082, + "max_document_length": 172, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mal_Mlym-mal_Mlym": { - "number_of_characters": 82.69222222222223, + "number_of_characters": 73599, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08965802469135803, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 148.81762295081967, + "max_document_length": 191, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mal_Mlym-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mal_Mlym": { - "number_of_characters": 82.69222222222223, + "number_of_characters": 73599, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08965802469135803, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 148.81762295081967, + "max_document_length": 191, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mar_Deva-mar_Deva": { - "number_of_characters": 70.62625139043382, + "number_of_characters": 62671, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07633620844319669, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 15, + "average_document_length": 126.42418032786885, + "max_document_length": 160, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "mar_Deva-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mar_Deva": { - "number_of_characters": 70.62625139043382, + "number_of_characters": 62671, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07633620844319669, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 15, + "average_document_length": 126.42418032786885, + "max_document_length": 160, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "mkd_Cyrl-mkd_Cyrl": { - "number_of_characters": 76.01333333333334, + "number_of_characters": 67588, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08223703703703704, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 136.5, + "max_document_length": 180, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mkd_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mkd_Cyrl": { - "number_of_characters": 76.01333333333334, + "number_of_characters": 67588, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08223703703703704, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 136.5, + "max_document_length": 180, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mlt_Latn-mlt_Latn": { - "number_of_characters": 77.00444444444445, + "number_of_characters": 68480, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08333827160493827, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 138.327868852459, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mlt_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mlt_Latn": { - "number_of_characters": 77.00444444444445, + "number_of_characters": 68480, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08333827160493827, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 138.327868852459, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mri_Latn-mri_Latn": { - "number_of_characters": 83.71444444444444, + "number_of_characters": 74519, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09079382716049382, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 150.702868852459, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mri_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mri_Latn": { - "number_of_characters": 83.71444444444444, + "number_of_characters": 74519, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09079382716049382, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 150.702868852459, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mya_Mymr-mya_Mymr": { - "number_of_characters": 91.28333333333333, + "number_of_characters": 81331, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0992037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 164.66188524590163, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "mya_Mymr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-mya_Mymr": { - "number_of_characters": 91.28333333333333, + "number_of_characters": 81331, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0992037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 164.66188524590163, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nld_Latn-nld_Latn": { - "number_of_characters": 77.34777777777778, + "number_of_characters": 68789, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08371975308641975, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 138.9610655737705, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nld_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-nld_Latn": { - "number_of_characters": 77.34777777777778, + "number_of_characters": 68789, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08371975308641975, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 138.9610655737705, + "max_document_length": 183, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nob_Latn-nob_Latn": { - "number_of_characters": 73.04555555555555, + "number_of_characters": 64917, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0789395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 131.0266393442623, + "max_document_length": 168, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nob_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-nob_Latn": { - "number_of_characters": 73.04555555555555, + "number_of_characters": 64917, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0789395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 131.0266393442623, + "max_document_length": 168, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Deva-npi_Deva": { - "number_of_characters": 68.89666666666666, + "number_of_characters": 61183, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07432962962962962, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 123.375, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Deva-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-npi_Deva": { - "number_of_characters": 68.89666666666666, + "number_of_characters": 61183, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07432962962962962, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 123.375, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Latn-npi_Latn": { - "number_of_characters": 73.89666666666666, + "number_of_characters": 65683, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07988518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 132.59631147540983, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-npi_Latn": { - "number_of_characters": 73.89666666666666, + "number_of_characters": 65683, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07988518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 132.59631147540983, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nso_Latn-nso_Latn": { - "number_of_characters": 88.77444444444444, + "number_of_characters": 79073, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09641604938271604, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 160.03483606557376, + "max_document_length": 235, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nso_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-nso_Latn": { - "number_of_characters": 88.77444444444444, + "number_of_characters": 79073, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09641604938271604, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 160.03483606557376, + "max_document_length": 235, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nya_Latn-nya_Latn": { - "number_of_characters": 92.78777777777778, + "number_of_characters": 82685, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.1008753086419753, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 167.43647540983608, + "max_document_length": 215, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "nya_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-nya_Latn": { - "number_of_characters": 92.78777777777778, + "number_of_characters": 82685, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.1008753086419753, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 167.43647540983608, + "max_document_length": 215, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ory_Orya-ory_Orya": { - "number_of_characters": 74.95777777777778, + "number_of_characters": 66638, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0810641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 10, + "average_document_length": 134.55327868852459, + "max_document_length": 168, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ory_Orya-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ory_Orya": { - "number_of_characters": 74.95777777777778, + "number_of_characters": 66638, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0810641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 10, + "average_document_length": 134.55327868852459, + "max_document_length": 168, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pan_Guru-pan_Guru": { - "number_of_characters": 75.29777777777778, + "number_of_characters": 66944, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08144197530864197, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 135.18032786885246, + "max_document_length": 157, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pan_Guru-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-pan_Guru": { - "number_of_characters": 75.29777777777778, + "number_of_characters": 66944, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08144197530864197, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 135.18032786885246, + "max_document_length": 157, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pbt_Arab-pbt_Arab": { - "number_of_characters": 69.67111111111112, + "number_of_characters": 61880, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07519012345679013, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 124.80327868852459, + "max_document_length": 155, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pbt_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-pbt_Arab": { - "number_of_characters": 69.67111111111112, + "number_of_characters": 61880, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07519012345679013, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 124.80327868852459, + "max_document_length": 155, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pes_Arab-pes_Arab": { - "number_of_characters": 66.75111111111111, + "number_of_characters": 59252, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07194567901234568, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 119.41803278688525, + "max_document_length": 152, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pes_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-pes_Arab": { - "number_of_characters": 66.75111111111111, + "number_of_characters": 59252, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07194567901234568, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 119.41803278688525, + "max_document_length": 152, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "plt_Latn-plt_Latn": { - "number_of_characters": 96.99555555555555, + "number_of_characters": 86472, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10555061728395061, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 175.19672131147541, + "max_document_length": 222, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "plt_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-plt_Latn": { - "number_of_characters": 96.99555555555555, + "number_of_characters": 86472, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10555061728395061, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 175.19672131147541, + "max_document_length": 222, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pol_Latn-pol_Latn": { - "number_of_characters": 76.09777777777778, + "number_of_characters": 67664, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08233086419753087, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 136.65573770491804, + "max_document_length": 196, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "pol_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-pol_Latn": { - "number_of_characters": 76.09777777777778, + "number_of_characters": 67664, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08233086419753087, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 136.65573770491804, + "max_document_length": 196, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "por_Latn-por_Latn": { - "number_of_characters": 80.11666666666666, + "number_of_characters": 71281, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08679629629629629, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 144.06762295081967, + "max_document_length": 179, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "por_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-por_Latn": { - "number_of_characters": 80.11666666666666, + "number_of_characters": 71281, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08679629629629629, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 144.06762295081967, + "max_document_length": 179, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ron_Latn-ron_Latn": { - "number_of_characters": 80.74222222222222, + "number_of_characters": 71844, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08749135802469137, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 145.22131147540983, + "max_document_length": 181, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ron_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ron_Latn": { - "number_of_characters": 80.74222222222222, + "number_of_characters": 71844, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08749135802469137, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 145.22131147540983, + "max_document_length": 181, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "rus_Cyrl-rus_Cyrl": { - "number_of_characters": 85.16333333333333, + "number_of_characters": 75823, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0924037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 153.375, + "max_document_length": 196, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "rus_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-rus_Cyrl": { - "number_of_characters": 85.16333333333333, + "number_of_characters": 75823, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0924037037037037, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 153.375, + "max_document_length": 196, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "shn_Mymr-shn_Mymr": { - "number_of_characters": 77.90222222222222, + "number_of_characters": 69288, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0843358024691358, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 139.98360655737704, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "shn_Mymr-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-shn_Mymr": { - "number_of_characters": 77.90222222222222, + "number_of_characters": 69288, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0843358024691358, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 139.98360655737704, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Latn-sin_Latn": { - "number_of_characters": 96.46666666666667, + "number_of_characters": 85996, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10496296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 174.22131147540983, + "max_document_length": 224, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sin_Latn": { - "number_of_characters": 96.46666666666667, + "number_of_characters": 85996, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10496296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 174.22131147540983, + "max_document_length": 224, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Sinh-sin_Sinh": { - "number_of_characters": 71.91777777777777, + "number_of_characters": 63902, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07768641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 128.94672131147541, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Sinh-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sin_Sinh": { - "number_of_characters": 71.91777777777777, + "number_of_characters": 63902, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07768641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 128.94672131147541, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "slk_Latn-slk_Latn": { - "number_of_characters": 70.5411111111111, + "number_of_characters": 62663, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07615679012345679, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 126.4077868852459, + "max_document_length": 146, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "slk_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-slk_Latn": { - "number_of_characters": 70.5411111111111, + "number_of_characters": 62663, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07615679012345679, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 126.4077868852459, + "max_document_length": 146, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "slv_Latn-slv_Latn": { - "number_of_characters": 70.79888888888888, + "number_of_characters": 62895, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0764432098765432, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 126.88319672131148, + "max_document_length": 176, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "slv_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-slv_Latn": { - "number_of_characters": 70.79888888888888, + "number_of_characters": 62895, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0764432098765432, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 126.88319672131148, + "max_document_length": 176, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sna_Latn-sna_Latn": { - "number_of_characters": 83.30700778642937, + "number_of_characters": 74071, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09044161044096703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 149.78483606557376, + "max_document_length": 191, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sna_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sna_Latn": { - "number_of_characters": 83.30700778642937, + "number_of_characters": 74071, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09044161044096703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 149.78483606557376, + "max_document_length": 191, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "snd_Arab-snd_Arab": { - "number_of_characters": 65.42333333333333, + "number_of_characters": 58057, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07047037037037036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 116.96926229508196, + "max_document_length": 164, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "snd_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-snd_Arab": { - "number_of_characters": 65.42333333333333, + "number_of_characters": 58057, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07047037037037036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 116.96926229508196, + "max_document_length": 164, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "som_Latn-som_Latn": { - "number_of_characters": 92.95777777777778, + "number_of_characters": 82838, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.1010641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 167.75, + "max_document_length": 201, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "som_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-som_Latn": { - "number_of_characters": 92.95777777777778, + "number_of_characters": 82838, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.1010641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 167.75, + "max_document_length": 201, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sot_Latn-sot_Latn": { - "number_of_characters": 85.13111111111111, + "number_of_characters": 75794, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0923679012345679, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 153.3155737704918, + "max_document_length": 186, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sot_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sot_Latn": { - "number_of_characters": 85.13111111111111, + "number_of_characters": 75794, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0923679012345679, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 153.3155737704918, + "max_document_length": 186, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "spa_Latn-spa_Latn": { - "number_of_characters": 84.16, + "number_of_characters": 74920, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09128888888888889, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 151.52459016393442, + "max_document_length": 180, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "spa_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-spa_Latn": { - "number_of_characters": 84.16, + "number_of_characters": 74920, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09128888888888889, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 151.52459016393442, + "max_document_length": 180, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "srp_Cyrl-srp_Cyrl": { - "number_of_characters": 69.49833147942158, + "number_of_characters": 61657, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07508157005497394, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 13, + "average_document_length": 124.34631147540983, + "max_document_length": 160, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "srp_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-srp_Cyrl": { - "number_of_characters": 69.49833147942158, + "number_of_characters": 61657, "num_samples": 1387, "num_queries": 899, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07508157005497394, - "average_relevant_docs_per_query": 1.0011123470522802 + "min_document_length": 13, + "average_document_length": 124.34631147540983, + "max_document_length": 160, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.085650723025584, + "max_query_length": 2, + "unique_queries": 899, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0011123470522802, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 488 }, "ssw_Latn-ssw_Latn": { - "number_of_characters": 83.09777777777778, + "number_of_characters": 73964, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09010864197530864, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 149.5655737704918, + "max_document_length": 182, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ssw_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ssw_Latn": { - "number_of_characters": 83.09777777777778, + "number_of_characters": 73964, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09010864197530864, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 149.5655737704918, + "max_document_length": 182, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sun_Latn-sun_Latn": { - "number_of_characters": 80.16, + "number_of_characters": 71320, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08684444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 144.14754098360655, + "max_document_length": 173, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sun_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-sun_Latn": { - "number_of_characters": 80.16, + "number_of_characters": 71320, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08684444444444445, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 144.14754098360655, + "max_document_length": 173, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "swe_Latn-swe_Latn": { - "number_of_characters": 70.67666666666666, + "number_of_characters": 62785, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07630740740740741, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 126.6577868852459, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "swe_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-swe_Latn": { - "number_of_characters": 70.67666666666666, + "number_of_characters": 62785, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07630740740740741, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 126.6577868852459, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "swh_Latn-swh_Latn": { - "number_of_characters": 82.56, + "number_of_characters": 73480, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08951111111111111, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 148.5737704918033, + "max_document_length": 194, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "swh_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-swh_Latn": { - "number_of_characters": 82.56, + "number_of_characters": 73480, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08951111111111111, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 148.5737704918033, + "max_document_length": 194, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tam_Taml-tam_Taml": { - "number_of_characters": 83.12777777777778, + "number_of_characters": 73991, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09014197530864197, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 149.62090163934425, + "max_document_length": 181, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tam_Taml-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tam_Taml": { - "number_of_characters": 83.12777777777778, + "number_of_characters": 73991, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09014197530864197, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 149.62090163934425, + "max_document_length": 181, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tel_Telu-tel_Telu": { - "number_of_characters": 74.18777777777778, + "number_of_characters": 65945, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08020864197530865, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 133.13319672131146, + "max_document_length": 149, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tel_Telu-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tel_Telu": { - "number_of_characters": 74.18777777777778, + "number_of_characters": 65945, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08020864197530865, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 133.13319672131146, + "max_document_length": 149, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tgk_Cyrl-tgk_Cyrl": { - "number_of_characters": 76.28111111111112, + "number_of_characters": 67829, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08253456790123458, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 136.99385245901638, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tgk_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tgk_Cyrl": { - "number_of_characters": 76.28111111111112, + "number_of_characters": 67829, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08253456790123458, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 136.99385245901638, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tgl_Latn-tgl_Latn": { - "number_of_characters": 84.34555555555555, + "number_of_characters": 75087, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09149506172839506, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 151.86680327868854, + "max_document_length": 184, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tgl_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tgl_Latn": { - "number_of_characters": 84.34555555555555, + "number_of_characters": 75087, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09149506172839506, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 151.86680327868854, + "max_document_length": 184, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tha_Thai-tha_Thai": { - "number_of_characters": 61.46666666666667, + "number_of_characters": 54496, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06607407407407408, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 109.67213114754098, + "max_document_length": 123, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tha_Thai-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tha_Thai": { - "number_of_characters": 61.46666666666667, + "number_of_characters": 54496, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06607407407407408, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 109.67213114754098, + "max_document_length": 123, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tir_Ethi-tir_Ethi": { - "number_of_characters": 53.99888888888889, + "number_of_characters": 47775, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.057776543209876546, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 95.89959016393442, + "max_document_length": 110, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tir_Ethi-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tir_Ethi": { - "number_of_characters": 53.99888888888889, + "number_of_characters": 47775, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.057776543209876546, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 95.89959016393442, + "max_document_length": 110, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tsn_Latn-tsn_Latn": { - "number_of_characters": 89.12777777777778, + "number_of_characters": 79391, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09680864197530864, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 160.68647540983608, + "max_document_length": 204, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tsn_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tsn_Latn": { - "number_of_characters": 89.12777777777778, + "number_of_characters": 79391, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.09680864197530864, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 160.68647540983608, + "max_document_length": 204, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tso_Latn-tso_Latn": { - "number_of_characters": 93.69444444444444, + "number_of_characters": 83501, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10188271604938272, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 169.10860655737704, + "max_document_length": 215, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tso_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tso_Latn": { - "number_of_characters": 93.69444444444444, + "number_of_characters": 83501, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10188271604938272, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 169.10860655737704, + "max_document_length": 215, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tur_Latn-tur_Latn": { - "number_of_characters": 73.56222222222222, + "number_of_characters": 65382, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07951358024691357, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 131.9795081967213, + "max_document_length": 158, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "tur_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-tur_Latn": { - "number_of_characters": 73.56222222222222, + "number_of_characters": 65382, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07951358024691357, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 131.9795081967213, + "max_document_length": 158, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ukr_Cyrl-ukr_Cyrl": { - "number_of_characters": 74.08222222222223, + "number_of_characters": 65850, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08009135802469136, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 132.93852459016392, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ukr_Cyrl-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-ukr_Cyrl": { - "number_of_characters": 74.08222222222223, + "number_of_characters": 65850, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08009135802469136, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 132.93852459016392, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Arab-urd_Arab": { - "number_of_characters": 72.52666666666667, + "number_of_characters": 64450, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07836296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 130.06967213114754, + "max_document_length": 187, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Arab-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-urd_Arab": { - "number_of_characters": 72.52666666666667, + "number_of_characters": 64450, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07836296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 130.06967213114754, + "max_document_length": 187, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Latn-urd_Latn": { - "number_of_characters": 92.07, + "number_of_characters": 82039, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10007777777777777, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 166.1127049180328, + "max_document_length": 230, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-urd_Latn": { - "number_of_characters": 92.07, + "number_of_characters": 82039, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10007777777777777, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 166.1127049180328, + "max_document_length": 230, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "uzn_Latn-uzn_Latn": { - "number_of_characters": 79.61333333333333, + "number_of_characters": 70828, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08623703703703703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 143.13934426229508, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "uzn_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-uzn_Latn": { - "number_of_characters": 79.61333333333333, + "number_of_characters": 70828, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08623703703703703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 143.13934426229508, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "vie_Latn-vie_Latn": { - "number_of_characters": 75.05333333333333, + "number_of_characters": 66724, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08117037037037036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 134.7295081967213, + "max_document_length": 161, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "vie_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-vie_Latn": { - "number_of_characters": 75.05333333333333, + "number_of_characters": 66724, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08117037037037036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 134.7295081967213, + "max_document_length": 161, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "war_Latn-war_Latn": { - "number_of_characters": 88.07555555555555, + "number_of_characters": 78444, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0956395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 158.74590163934425, + "max_document_length": 207, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "war_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-war_Latn": { - "number_of_characters": 88.07555555555555, + "number_of_characters": 78444, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0956395061728395, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 158.74590163934425, + "max_document_length": 207, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "wol_Latn-wol_Latn": { - "number_of_characters": 72.60555555555555, + "number_of_characters": 64521, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07845061728395061, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 130.21516393442624, + "max_document_length": 139, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "wol_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-wol_Latn": { - "number_of_characters": 72.60555555555555, + "number_of_characters": 64521, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07845061728395061, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 130.21516393442624, + "max_document_length": 139, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "xho_Latn-xho_Latn": { - "number_of_characters": 80.50333333333333, + "number_of_characters": 71629, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08722592592592593, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 144.78073770491804, + "max_document_length": 179, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "xho_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-xho_Latn": { - "number_of_characters": 80.50333333333333, + "number_of_characters": 71629, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08722592592592593, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 144.78073770491804, + "max_document_length": 179, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "yor_Latn-yor_Latn": { - "number_of_characters": 70.64, + "number_of_characters": 62752, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07626666666666666, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 126.59016393442623, + "max_document_length": 143, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "yor_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-yor_Latn": { - "number_of_characters": 70.64, + "number_of_characters": 62752, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07626666666666666, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 11, + "average_document_length": 126.59016393442623, + "max_document_length": 143, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zho_Hans-zho_Hans": { - "number_of_characters": 23.747777777777777, + "number_of_characters": 20549, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.024164197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 7, + "average_document_length": 40.10860655737705, + "max_document_length": 64, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zho_Hans-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-zho_Hans": { - "number_of_characters": 23.747777777777777, + "number_of_characters": 20549, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.024164197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 7, + "average_document_length": 40.10860655737705, + "max_document_length": 64, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zho_Hant-zho_Hant": { - "number_of_characters": 23.07888888888889, + "number_of_characters": 19947, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.02342098765432099, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 7, + "average_document_length": 38.875, + "max_document_length": 45, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zho_Hant-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-zho_Hant": { - "number_of_characters": 23.07888888888889, + "number_of_characters": 19947, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.02342098765432099, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 7, + "average_document_length": 38.875, + "max_document_length": 45, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zsm_Latn-zsm_Latn": { - "number_of_characters": 80.92444444444445, + "number_of_characters": 72008, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08769382716049383, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 145.55737704918033, + "max_document_length": 210, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zsm_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-zsm_Latn": { - "number_of_characters": 80.92444444444445, + "number_of_characters": 72008, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08769382716049383, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 145.55737704918033, + "max_document_length": 210, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zul_Latn-zul_Latn": { - "number_of_characters": 78.0411111111111, + "number_of_characters": 69413, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08449012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 140.23975409836066, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "zul_Latn-eng_Latn": { - "number_of_characters": 79.34777777777778, + "number_of_characters": 70589, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08594197530864198, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 142.64959016393442, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "eng_Latn-zul_Latn": { - "number_of_characters": 78.0411111111111, + "number_of_characters": 69413, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08449012345679012, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 140.23975409836066, + "max_document_length": 171, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Arab-arb_Latn": { - "number_of_characters": 69.02444444444444, + "number_of_characters": 61298, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.0744716049382716, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 12, + "average_document_length": 123.61065573770492, + "max_document_length": 160, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "arb_Latn-arb_Arab": { - "number_of_characters": 60.55, + "number_of_characters": 53671, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.06505555555555555, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 107.98155737704919, + "max_document_length": 134, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Beng-ben_Latn": { - "number_of_characters": 76.78777777777778, + "number_of_characters": 68285, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08309753086419754, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 137.92827868852459, + "max_document_length": 185, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "ben_Latn-ben_Beng": { - "number_of_characters": 71.48444444444445, + "number_of_characters": 63512, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07720493827160495, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 9, + "average_document_length": 128.14754098360655, + "max_document_length": 175, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Deva-hin_Latn": { - "number_of_characters": 76.81222222222222, + "number_of_characters": 68307, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08312469135802468, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 137.9733606557377, + "max_document_length": 170, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "hin_Latn-hin_Deva": { - "number_of_characters": 74.61777777777777, + "number_of_characters": 66332, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.08068641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 133.9262295081967, + "max_document_length": 165, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Deva-npi_Latn": { - "number_of_characters": 73.89666666666666, + "number_of_characters": 65683, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07988518518518518, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 132.59631147540983, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "npi_Latn-npi_Deva": { - "number_of_characters": 68.89666666666666, + "number_of_characters": 61183, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07432962962962962, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 123.375, + "max_document_length": 154, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Sinh-sin_Latn": { - "number_of_characters": 96.46666666666667, + "number_of_characters": 85996, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10496296296296297, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 174.22131147540983, + "max_document_length": 224, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "sin_Latn-sin_Sinh": { - "number_of_characters": 71.91777777777777, + "number_of_characters": 63902, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07768641975308642, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 17, + "average_document_length": 128.94672131147541, + "max_document_length": 159, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Arab-urd_Latn": { - "number_of_characters": 92.07, + "number_of_characters": 82039, "num_samples": 1388, "num_queries": 900, "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.10007777777777777, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 15, + "average_document_length": 166.1127049180328, + "max_document_length": 230, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 }, "urd_Latn-urd_Arab": { - "number_of_characters": 72.52666666666667, - "num_samples": 1388, - "num_queries": 900, - "num_documents": 488, - "average_document_length": 0.004098360655737705, - "average_query_length": 0.07836296296296297, - "average_relevant_docs_per_query": 1.0 + "number_of_characters": 64450, + "num_samples": 1388, + "num_queries": 900, + "num_documents": 488, + "min_document_length": 11, + "average_document_length": 130.06967213114754, + "max_document_length": 187, + "unique_documents": 488, + "min_query_length": 2, + "average_query_length": 1.0844444444444445, + "max_query_length": 2, + "unique_queries": 900, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 488 } } } diff --git a/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json b/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json index e9ff1f7874..3d27f624b9 100644 --- a/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/COIRCodeSearchNetRetrieval.json @@ -1,66 +1,129 @@ { "test": { - "number_of_characters": 664.7686497593272, + "number_of_characters": 36843313, "num_samples": 1056326, "num_queries": 52561, "num_documents": 1003765, - "average_document_length": 1.9924982441109223e-06, - "average_query_length": 0.012609513703303347, + "min_document_length": 54, + "average_document_length": 34.70511822986456, + "max_document_length": 334374, + "unique_documents": 1003765, + "min_query_length": 2, + "average_query_length": 38.19428854093339, + "max_query_length": 2, + "unique_queries": 52561, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 52561, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 941.4041426464673, + "number_of_characters": 14574651, "num_samples": 295228, "num_queries": 14918, "num_documents": 280310, - "average_document_length": 7.134957725375477e-06, - "average_query_length": 0.0629711853228628, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 95, + "average_document_length": 49.994759373550714, + "max_document_length": 14008, + "unique_documents": 280310, + "min_query_length": 2, + "average_query_length": 37.5801045716584, + "max_query_length": 2, + "unique_queries": 14918, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 14918 }, "javascript": { - "number_of_characters": 748.8343968398663, + "number_of_characters": 2587540, "num_samples": 68145, "num_queries": 3291, "num_documents": 64854, - "average_document_length": 3.0838498781879296e-05, - "average_query_length": 0.2269323600242681, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 87, + "average_document_length": 37.89792456903198, + "max_document_length": 334374, + "unique_documents": 64854, + "min_query_length": 2, + "average_query_length": 39.412944393801276, + "max_query_length": 2, + "unique_queries": 3291, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 3291 }, "go": { - "number_of_characters": 405.3770007387343, + "number_of_characters": 3641108, "num_samples": 190562, "num_queries": 8122, "num_documents": 182440, - "average_document_length": 1.0962508221881167e-05, - "average_query_length": 0.049664737840277556, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 54, + "average_document_length": 17.957838193378645, + "max_document_length": 5280, + "unique_documents": 182440, + "min_query_length": 2, + "average_query_length": 44.9248953459739, + "max_query_length": 2, + "unique_queries": 8122, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 8122 }, "ruby": { - "number_of_characters": 457.43695479777955, + "number_of_characters": 629446, "num_samples": 28831, "num_queries": 1261, "num_documents": 27570, - "average_document_length": 7.254261878853827e-05, - "average_query_length": 0.3611712567785722, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 83, + "average_document_length": 20.830830612985128, + "max_document_length": 3992, + "unique_documents": 27570, + "min_query_length": 2, + "average_query_length": 43.72720063441713, + "max_query_length": 2, + "unique_queries": 1261, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1261 }, "java": { - "number_of_characters": 588.8922866271109, + "number_of_characters": 6791137, "num_samples": 191821, "num_queries": 10955, "num_documents": 180866, - "average_document_length": 1.1057910276116019e-05, - "average_query_length": 0.053573006538303145, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 77, + "average_document_length": 35.54789180940586, + "max_document_length": 7615, + "unique_documents": 180866, + "min_query_length": 2, + "average_query_length": 33.019808306709265, + "max_query_length": 2, + "unique_queries": 10955, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 10955 }, "php": { - "number_of_characters": 578.8503639217925, + "number_of_characters": 8619431, "num_samples": 281739, "num_queries": 14014, "num_documents": 267725, - "average_document_length": 7.470352040339901e-06, - "average_query_length": 0.041162434987997175, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 94, + "average_document_length": 30.195091978709495, + "max_document_length": 4904, + "unique_documents": 267725, + "min_query_length": 2, + "average_query_length": 38.20822035107749, + "max_query_length": 2, + "unique_queries": 14014, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 14014 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json index a0325c6385..6d73096d42 100644 --- a/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeEditSearchRetrieval.json @@ -1,129 +1,255 @@ { "train": { - "number_of_characters": 71.98776923076923, + "number_of_characters": 935841, "num_samples": 26000, "num_queries": 13000, "num_documents": 13000, - "average_document_length": 7.692307692307693e-05, - "average_query_length": 0.005460597633136095, + "min_document_length": 18, + "average_document_length": 70.98776923076923, + "max_document_length": 2532, + "unique_documents": 13000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 13000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 13000, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 70.519, + "number_of_characters": 70519, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.06951900000000001, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 21, + "average_document_length": 69.519, + "max_document_length": 1811, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "javascript": { - "number_of_characters": 57.88, + "number_of_characters": 57880, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.05688, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 56.88, + "max_document_length": 601, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "typescript": { - "number_of_characters": 61.092, + "number_of_characters": 61092, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.060092, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 60.092, + "max_document_length": 659, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "go": { - "number_of_characters": 71.797, + "number_of_characters": 71797, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.070797, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 70.797, + "max_document_length": 1529, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "ruby": { - "number_of_characters": 67.9, + "number_of_characters": 67900, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.0669, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 66.9, + "max_document_length": 751, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "java": { - "number_of_characters": 63.984, + "number_of_characters": 63984, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.062984, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 23, + "average_document_length": 62.984, + "max_document_length": 807, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "php": { - "number_of_characters": 62.927, + "number_of_characters": 62927, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.061927, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 21, + "average_document_length": 61.927, + "max_document_length": 766, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "c": { - "number_of_characters": 98.588, + "number_of_characters": 98588, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.097588, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 20, + "average_document_length": 97.588, + "max_document_length": 1672, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "c++": { - "number_of_characters": 115.48, + "number_of_characters": 115480, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.11448, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 22, + "average_document_length": 114.48, + "max_document_length": 1856, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "rust": { - "number_of_characters": 68.503, + "number_of_characters": 68503, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.067503, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 67.503, + "max_document_length": 2532, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "swift": { - "number_of_characters": 58.279, + "number_of_characters": 58279, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.057279000000000004, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 19, + "average_document_length": 57.279, + "max_document_length": 727, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "scala": { - "number_of_characters": 65.833, + "number_of_characters": 65833, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.064833, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 22, + "average_document_length": 64.833, + "max_document_length": 685, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "shell": { - "number_of_characters": 73.059, + "number_of_characters": 73059, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.072059, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 72.059, + "max_document_length": 813, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json b/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json index c498011923..1be18319cd 100644 --- a/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json +++ b/mteb/descriptive_stats/Retrieval/CodeFeedbackMT.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 5894.401984777533, + "number_of_characters": 156266302, "num_samples": 79660, "num_queries": 13277, "num_documents": 66383, - "average_document_length": 0.022127347788495202, - "average_query_length": 0.3333224566192555, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 127, + "average_document_length": 885.131117906693, + "max_document_length": 32432, + "unique_documents": 66383, + "min_query_length": 2, + "average_query_length": 7344.177374406869, + "max_query_length": 9403, + "unique_queries": 13277, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 13277 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json b/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json index a7e6531492..4511605dd5 100644 --- a/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json +++ b/mteb/descriptive_stats/Retrieval/CodeFeedbackST.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 2246.575885305433, + "number_of_characters": 260957682, "num_samples": 187832, "num_queries": 31306, "num_documents": 156526, - "average_document_length": 0.009725743421916316, - "average_query_length": 0.02313435668710662, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 26, + "average_document_length": 144.85253568097312, + "max_document_length": 13851, + "unique_documents": 156526, + "min_query_length": 1, + "average_query_length": 7611.464064396601, + "max_query_length": 11354, + "unique_queries": 31306, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 31306 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json index 96802f81df..a817119b43 100644 --- a/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeSearchNetCCRetrieval.json @@ -1,66 +1,129 @@ { "test": { - "number_of_characters": 390.06276516809044, + "number_of_characters": 22407915, "num_samples": 1058035, "num_queries": 52561, "num_documents": 1005474, - "average_document_length": 1.9891116030847143e-06, - "average_query_length": 0.007383093266263778, + "min_document_length": 23, + "average_document_length": 20.28592186371801, + "max_document_length": 214210, + "unique_documents": 1005474, + "min_query_length": 2, + "average_query_length": 38.259317745096176, + "max_query_length": 2, + "unique_queries": 52561, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 52561, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 553.7934039415471, + "number_of_characters": 8792958, "num_samples": 295570, "num_queries": 14918, "num_documents": 280652, - "average_document_length": 7.126263130139817e-06, - "average_query_length": 0.0369884303486759, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 38, + "average_document_length": 29.330466200133973, + "max_document_length": 8326, + "unique_documents": 280652, + "min_query_length": 2, + "average_query_length": 37.62595522187961, + "max_query_length": 2, + "unique_queries": 14918, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 14918 }, "javascript": { - "number_of_characters": 445.70707991491946, + "number_of_characters": 1590642, "num_samples": 68492, "num_queries": 3291, "num_documents": 65201, - "average_document_length": 3.0674376159874846e-05, - "average_query_length": 0.1348243937754237, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 40, + "average_document_length": 22.395975521847824, + "max_document_length": 214210, + "unique_documents": 65201, + "min_query_length": 2, + "average_query_length": 39.6238225463385, + "max_query_length": 2, + "unique_queries": 3291, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 3291 }, "go": { - "number_of_characters": 235.76803742920464, + "number_of_characters": 2264134, "num_samples": 190857, "num_queries": 8122, "num_documents": 182735, - "average_document_length": 1.0944810791583441e-05, - "average_query_length": 0.028782077989313547, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 23, + "average_document_length": 10.390259118395491, + "max_document_length": 3589, + "unique_documents": 182735, + "min_query_length": 2, + "average_query_length": 44.99753755232701, + "max_query_length": 2, + "unique_queries": 8122, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 8122 }, "ruby": { - "number_of_characters": 268.8731165741475, + "number_of_characters": 391703, "num_samples": 28849, "num_queries": 1261, "num_documents": 27588, - "average_document_length": 7.24952878062926e-05, - "average_query_length": 0.21163609561788066, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 36, + "average_document_length": 12.198310859794113, + "max_document_length": 2244, + "unique_documents": 27588, + "min_query_length": 2, + "average_query_length": 43.75574940523394, + "max_query_length": 2, + "unique_queries": 1261, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1261 }, "java": { - "number_of_characters": 344.5341853035144, + "number_of_characters": 4114584, "num_samples": 192016, "num_queries": 10955, "num_documents": 181061, - "average_document_length": 1.1046001071462105e-05, - "average_query_length": 0.03126738341428703, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 38, + "average_document_length": 20.724849636310413, + "max_document_length": 5066, + "unique_documents": 181061, + "min_query_length": 2, + "average_query_length": 33.055408489274306, + "max_query_length": 2, + "unique_queries": 10955, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 10955 }, "php": { - "number_of_characters": 338.62194947909234, + "number_of_characters": 5253894, "num_samples": 282251, "num_queries": 14014, "num_documents": 268237, - "average_document_length": 7.456092932742314e-06, - "average_query_length": 0.024020404558234076, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 40, + "average_document_length": 17.586760961388624, + "max_document_length": 2995, + "unique_documents": 268237, + "min_query_length": 2, + "average_query_length": 38.28129013843299, + "max_query_length": 2, + "unique_queries": 14014, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 14014 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json b/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json index 188d4eb7ec..853c4c79c6 100644 --- a/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/CodeSearchNetRetrieval.json @@ -1,66 +1,129 @@ { "test": { - "number_of_characters": 325.01233333333334, + "number_of_characters": 1950074, "num_samples": 12000, "num_queries": 6000, "num_documents": 6000, - "average_document_length": 0.00016666666666666666, - "average_query_length": 0.05400205555555556, + "min_document_length": 2, + "average_document_length": 324.01233333333334, + "max_document_length": 17533, + "unique_documents": 6000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 6000, + "min_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 6000, "hf_subset_descriptive_stats": { "python": { - "number_of_characters": 467.546, + "number_of_characters": 467546, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.466546, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 8, + "average_document_length": 466.546, + "max_document_length": 8636, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "javascript": { - "number_of_characters": 187.018, + "number_of_characters": 187018, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.186018, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 2, + "average_document_length": 186.018, + "max_document_length": 7657, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "go": { - "number_of_characters": 126.213, + "number_of_characters": 126213, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.125213, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 14, + "average_document_length": 125.213, + "max_document_length": 1501, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "ruby": { - "number_of_characters": 314.818, + "number_of_characters": 314818, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.313818, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 5, + "average_document_length": 313.818, + "max_document_length": 17533, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "java": { - "number_of_characters": 691.36, + "number_of_characters": 691360, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.69036, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 2, + "average_document_length": 690.36, + "max_document_length": 6473, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 }, "php": { - "number_of_characters": 163.119, + "number_of_characters": 163119, "num_samples": 2000, "num_queries": 1000, "num_documents": 1000, - "average_document_length": 0.001, - "average_query_length": 0.162119, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 5, + "average_document_length": 162.119, + "max_document_length": 1240, + "unique_documents": 1000, + "min_query_length": 1, + "average_query_length": 1.0, + "max_query_length": 1, + "unique_queries": 1000, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1000 } } } diff --git a/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json b/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json index da6aa81741..07081e69c3 100644 --- a/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json +++ b/mteb/descriptive_stats/Retrieval/CodeTransOceanContest.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 2520.6537967751206, + "number_of_characters": 1744286, "num_samples": 1229, "num_queries": 221, "num_documents": 1008, - "average_document_length": 1.4965681295666415, - "average_query_length": 4.57969738539342, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 8, + "average_document_length": 221.90178571428572, + "max_document_length": 4147, + "unique_documents": 1008, + "min_query_length": 8, + "average_query_length": 6880.58371040724, + "max_query_length": 10852, + "unique_queries": 221, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 221 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json b/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json index cf266671f9..042658caad 100644 --- a/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json +++ b/mteb/descriptive_stats/Retrieval/CodeTransOceanDL.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 3347.695751633987, + "number_of_characters": 1543912, "num_samples": 996, "num_queries": 180, "num_documents": 816, - "average_document_length": 1.8138155997693195, - "average_query_length": 10.37567901234568, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 376, + "average_document_length": 411.97549019607845, + "max_document_length": 8285, + "unique_documents": 816, + "min_query_length": 58, + "average_query_length": 6709.666666666667, + "max_query_length": 8469, + "unique_queries": 180, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 180 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/CosQA.json b/mteb/descriptive_stats/Retrieval/CosQA.json index 5dd3a9637d..d8f17d4b21 100644 --- a/mteb/descriptive_stats/Retrieval/CosQA.json +++ b/mteb/descriptive_stats/Retrieval/CosQA.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 313.946741215298, + "number_of_characters": 5728450, "num_samples": 21104, "num_queries": 500, "num_documents": 20604, - "average_document_length": 0.013450433955314403, - "average_query_length": 0.073628, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 18, + "average_document_length": 0.8933702193748787, + "max_document_length": 83, + "unique_documents": 20604, + "min_query_length": 88, + "average_query_length": 11420.086, + "max_query_length": 6396, + "unique_queries": 500, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 500 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json b/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json index 6498bb736c..4598b2af77 100644 --- a/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/JaqketRetrieval.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 3799.701347237398, + "number_of_characters": 428294530, "num_samples": 115226, "num_queries": 997, "num_documents": 114229, - "average_document_length": 0.03281999517532617, - "average_query_length": 0.050858694438380335, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 16, + "average_document_length": 0.4425671239352529, + "max_document_length": 98, + "unique_documents": 114229, + "min_query_length": 8, + "average_query_length": 429532.5737211635, + "max_query_length": 188424, + "unique_queries": 997, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 989 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/NFCorpus.json b/mteb/descriptive_stats/Retrieval/NFCorpus.json new file mode 100644 index 0000000000..94df0b0cfb --- /dev/null +++ b/mteb/descriptive_stats/Retrieval/NFCorpus.json @@ -0,0 +1,11 @@ +{ + "test": { + "number_of_characters": 1612.5486310130989, + "num_samples": 3956, + "num_queries": 323, + "num_documents": 3633, + "average_document_length": 0.43787060972495073, + "average_query_length": 0.06738299034784193, + "average_relevant_docs_per_query": 38.18575851393189 + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/StackOverflowQA.json b/mteb/descriptive_stats/Retrieval/StackOverflowQA.json index d0949feac1..51972461e6 100644 --- a/mteb/descriptive_stats/Retrieval/StackOverflowQA.json +++ b/mteb/descriptive_stats/Retrieval/StackOverflowQA.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 2506.1079405241967, + "number_of_characters": 26584028, "num_samples": 21925, "num_queries": 1994, "num_documents": 19931, - "average_document_length": 0.060382397340162784, - "average_query_length": 0.6532730085944896, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 61, + "average_document_length": 130.32145903366614, + "max_document_length": 22234, + "unique_documents": 19931, + "min_query_length": 5, + "average_query_length": 12029.38365095286, + "max_query_length": 46028, + "unique_queries": 1994, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 1994 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json b/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json index c833692b92..56c3964a58 100644 --- a/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json +++ b/mteb/descriptive_stats/Retrieval/SyntheticText2SQL.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 210.9770886090626, + "number_of_characters": 14041553, "num_samples": 111702, "num_queries": 5851, "num_documents": 105851, - "average_document_length": 0.0012099201759594499, - "average_query_length": 0.014169514281931103, - "average_relevant_docs_per_query": 1.0 + "min_document_length": 13, + "average_document_length": 4.582686984534865, + "max_document_length": 281, + "unique_documents": 105851, + "min_query_length": 17, + "average_query_length": 2316.9494103572038, + "max_query_length": 762, + "unique_queries": 5851, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 1, + "unique_relevant_docs": 5851 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/Touche2020.json b/mteb/descriptive_stats/Retrieval/Touche2020.json index 76798710b0..a3c37a54ee 100644 --- a/mteb/descriptive_stats/Retrieval/Touche2020.json +++ b/mteb/descriptive_stats/Retrieval/Touche2020.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 1763.7633372731125, + "number_of_characters": 658107591, "num_samples": 382594, "num_queries": 49, "num_documents": 382545, - "average_document_length": 0.00449707816294695, - "average_query_length": 0.8862973760932945, - "average_relevant_docs_per_query": 19.020408163265305 + "min_document_length": 16, + "average_document_length": 0.0055627442523101854, + "max_document_length": 83, + "unique_documents": 382545, + "min_query_length": 3, + "average_query_length": 13430723.734693877, + "max_query_length": 106072, + "unique_queries": 49, + "min_relevant_docs_per_query": 40, + "average_relevant_docs_per_query": 45.183673469387756, + "max_relevant_docs_per_query": 52, + "unique_relevant_docs": 2099 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json b/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json index 3d04c572c6..1b436abd75 100644 --- a/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json +++ b/mteb/descriptive_stats/Retrieval/Touche2020Retrieval.v3.json @@ -1,11 +1,20 @@ { "test": { - "number_of_characters": 2140.8203839475027, + "number_of_characters": 637047138, "num_samples": 303781, "num_queries": 49, "num_documents": 303732, - "average_document_length": 0.006905402830518125, - "average_query_length": 0.8862973760932945, - "average_relevant_docs_per_query": 34.93877551020408 + "min_document_length": 16, + "average_document_length": 0.007006176497701922, + "max_document_length": 83, + "unique_documents": 303732, + "min_query_length": 41, + "average_query_length": 13000918.57142857, + "max_query_length": 105983, + "unique_queries": 49, + "min_relevant_docs_per_query": 40, + "average_relevant_docs_per_query": 58.142857142857146, + "max_relevant_docs_per_query": 87, + "unique_relevant_docs": 2732 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json b/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json index 6a48e9b08e..f23a5ea1be 100644 --- a/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/mFollowIRCrossLingualInstructionRetrieval.json @@ -4,48 +4,112 @@ "num_docs": 121635, "num_queries": 123, "number_of_characters": 283654099, + "min_document_length": 74, "average_document_length": 2331.0777818884367, + "max_document_length": 24179, + "unique_docs": 121635, + "min_query_length": 32, "average_query_length": 81.8780487804878, + "max_query_length": 173, + "unique_queries": 75, + "min_instruction_length": 93, "average_instruction_length": 389.9512195121951, + "max_instruction_length": 887, + "unique_instructions": 75, + "min_changed_instruction_length": 180, "average_changed_instruction_length": 450.5528455284553, + "max_changed_instruction_length": 974, + "unique_changed_instructions": 123, + "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 10.43089430894309, + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000, "hf_subset_descriptive_stats": { "eng-fas": { "num_samples": 41229, "num_docs": 41189, "num_queries": 40, "number_of_characters": 129597567, + "min_document_length": 99, "average_document_length": 3145.4990895627475, + "max_document_length": 24179, + "unique_docs": 41189, + "min_query_length": 34, "average_query_length": 80.075, + "max_query_length": 124, + "unique_queries": 40, + "min_instruction_length": 150, "average_instruction_length": 396.875, + "max_instruction_length": 887, + "unique_instructions": 40, + "min_changed_instruction_length": 205, "average_changed_instruction_length": 463.175, + "max_changed_instruction_length": 974, + "unique_changed_instructions": 40, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.85, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 22, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 }, "eng-rus": { "num_samples": 39366, "num_docs": 39326, "num_queries": 40, "number_of_characters": 109522175, + "min_document_length": 75, "average_document_length": 2784.0813456746173, + "max_document_length": 24061, + "unique_docs": 39326, + "min_query_length": 32, "average_query_length": 81.875, + "max_query_length": 173, + "unique_queries": 40, + "min_instruction_length": 93, "average_instruction_length": 371.125, + "max_instruction_length": 887, + "unique_instructions": 40, + "min_changed_instruction_length": 180, "average_changed_instruction_length": 431.8, + "max_changed_instruction_length": 957, + "unique_changed_instructions": 40, + "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 9.775, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 }, "eng-zho": { "num_samples": 41163, "num_docs": 41120, "num_queries": 43, "number_of_characters": 44534357, + "min_document_length": 74, "average_document_length": 1082.0501215953307, + "max_document_length": 23840, + "unique_docs": 41120, + "min_query_length": 32, "average_query_length": 83.55813953488372, + "max_query_length": 159, + "unique_queries": 43, + "min_instruction_length": 157, "average_instruction_length": 401.0232558139535, + "max_instruction_length": 731, + "unique_instructions": 43, + "min_changed_instruction_length": 209, "average_changed_instruction_length": 456.25581395348837, + "max_changed_instruction_length": 822, + "unique_changed_instructions": 43, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.651162790697674, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 } } } diff --git a/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json b/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json index 893dfde705..54ae5d1ec2 100644 --- a/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json +++ b/mteb/descriptive_stats/Retrieval/mFollowIRInstructionRetrieval.json @@ -4,48 +4,112 @@ "num_docs": 121635, "num_queries": 123, "number_of_characters": 283622456, + "min_document_length": 74, "average_document_length": 2331.0777818884367, + "max_document_length": 24179, + "unique_docs": 121635, + "min_query_length": 10, "average_query_length": 57.113821138211385, + "max_query_length": 136, + "unique_queries": 123, + "min_instruction_length": 37, "average_instruction_length": 281.0650406504065, + "max_instruction_length": 1009, + "unique_instructions": 123, + "min_changed_instruction_length": 44, "average_changed_instruction_length": 326.9430894308943, + "max_changed_instruction_length": 1083, + "unique_changed_instructions": 123, + "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 10.43089430894309, + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000, "hf_subset_descriptive_stats": { "fas": { "num_samples": 41229, "num_docs": 41189, "num_queries": 40, "number_of_characters": 129593838, + "min_document_length": 99, "average_document_length": 3145.4990895627475, + "max_document_length": 24179, + "unique_docs": 41189, + "min_query_length": 34, "average_query_length": 72.65, + "max_query_length": 124, + "unique_queries": 40, + "min_instruction_length": 121, "average_instruction_length": 358.925, + "max_instruction_length": 759, + "unique_instructions": 40, + "min_changed_instruction_length": 163, "average_changed_instruction_length": 415.325, + "max_changed_instruction_length": 842, + "unique_changed_instructions": 40, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.85, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 22, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 }, "rus": { "num_samples": 39366, "num_docs": 39326, "num_queries": 40, "number_of_characters": 109523683, + "min_document_length": 75, "average_document_length": 2784.0813456746173, + "max_document_length": 24061, + "unique_docs": 39326, + "min_query_length": 26, "average_query_length": 77.5, + "max_query_length": 136, + "unique_queries": 40, + "min_instruction_length": 78, "average_instruction_length": 387.0, + "max_instruction_length": 1009, + "unique_instructions": 40, + "min_changed_instruction_length": 187, "average_changed_instruction_length": 458.0, + "max_changed_instruction_length": 1083, + "unique_changed_instructions": 40, + "min_average_relevant_docs_per_query": 0, "average_relevant_docs_per_query": 9.775, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 }, "zho": { "num_samples": 41163, "num_docs": 41120, "num_queries": 43, "number_of_characters": 44504935, + "min_document_length": 74, "average_document_length": 1082.0501215953307, + "max_document_length": 23840, + "unique_docs": 41120, + "min_query_length": 10, "average_query_length": 23.697674418604652, + "max_query_length": 44, + "unique_queries": 43, + "min_instruction_length": 37, "average_instruction_length": 110.09302325581395, + "max_instruction_length": 209, + "unique_instructions": 43, + "min_changed_instruction_length": 44, "average_changed_instruction_length": 122.81395348837209, + "max_changed_instruction_length": 229, + "unique_changed_instructions": 43, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 10.651162790697674, - "average_top_ranked_per_query": 1000.0 + "max_average_relevant_docs_per_query": 24, + "min_average_top_ranked_per_query": 1000, + "average_top_ranked_per_query": 1000.0, + "max_average_top_ranked_per_query": 1000 } } } diff --git a/mteb/descriptive_stats/STS/STS12.json b/mteb/descriptive_stats/STS/STS12.json index e9205c1727..a7e11197ac 100644 --- a/mteb/descriptive_stats/STS/STS12.json +++ b/mteb/descriptive_stats/STS/STS12.json @@ -2,8 +2,16 @@ "test": { "num_samples": 3108, "number_of_characters": 402118, + "min_sentence1_length": 3, "average_sentence1_len": 63.78893178893179, + "max_sentence1_length": 220, + "unique_sentence1": 2236, + "min_sentence2_length": 7, "average_sentence2_len": 65.5926640926641, - "avg_score": 3.5060643500643507 + "max_sentence2_length": 204, + "unique_sentence2": 2797, + "min_score": 0.0, + "avg_score": 3.5060643500643507, + "max_score": 5.0 } } \ No newline at end of file diff --git a/mteb/descriptive_stats/STS/STS17.json b/mteb/descriptive_stats/STS/STS17.json index 164cc9d1e6..912738035b 100644 --- a/mteb/descriptive_stats/STS/STS17.json +++ b/mteb/descriptive_stats/STS/STS17.json @@ -2,86 +2,182 @@ "test": { "num_samples": 5346, "number_of_characters": 400264, + "min_sentence1_length": 6, "average_sentence1_len": 38.14665170220726, + "max_sentence1_length": 976, + "unique_sentence1": 4900, + "min_sentence2_length": 6, "average_sentence2_len": 36.72502805836139, + "max_sentence2_length": 1007, + "unique_sentence2": 4470, + "min_score": 0.0, "avg_score": 2.3554804214989464, + "max_score": 5.0, "hf_subset_descriptive_stats": { "ko-ko": { "num_samples": 2846, "number_of_characters": 183387, + "min_sentence1_length": 6, "average_sentence1_len": 31.991918482080113, + "max_sentence1_length": 976, + "unique_sentence1": 2650, + "min_sentence2_length": 6, "average_sentence2_len": 32.44483485593816, - "avg_score": 2.469359920356055 + "max_sentence2_length": 1007, + "unique_sentence2": 2720, + "min_score": 0.0, + "avg_score": 2.469359920356055, + "max_score": 5.0 }, "ar-ar": { "num_samples": 250, "number_of_characters": 16247, + "min_sentence1_length": 11, "average_sentence1_len": 32.208, + "max_sentence1_length": 99, + "unique_sentence1": 250, + "min_sentence2_length": 9, "average_sentence2_len": 32.78, - "avg_score": 2.216800000000001 + "max_sentence2_length": 83, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.216800000000001, + "max_score": 5.0 }, "en-ar": { "num_samples": 250, "number_of_characters": 18764, + "min_sentence1_length": 13, "average_sentence1_len": 42.36, + "max_sentence1_length": 105, + "unique_sentence1": 250, + "min_sentence2_length": 10, "average_sentence2_len": 32.696, - "avg_score": 2.1423999999999994 + "max_sentence2_length": 104, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.1423999999999994, + "max_score": 5.0 }, "en-de": { "num_samples": 250, "number_of_characters": 22177, + "min_sentence1_length": 12, "average_sentence1_len": 43.952, + "max_sentence1_length": 94, + "unique_sentence1": 250, + "min_sentence2_length": 15, "average_sentence2_len": 44.756, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 104, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 }, "en-en": { "num_samples": 250, "number_of_characters": 21669, + "min_sentence1_length": 12, "average_sentence1_len": 43.952, + "max_sentence1_length": 94, + "unique_sentence1": 250, + "min_sentence2_length": 15, "average_sentence2_len": 42.724, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 101, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 }, "en-tr": { "num_samples": 250, "number_of_characters": 20879, + "min_sentence1_length": 15, "average_sentence1_len": 41.916, + "max_sentence1_length": 101, + "unique_sentence1": 250, + "min_sentence2_length": 10, "average_sentence2_len": 41.6, - "avg_score": 2.1335999999999986 + "max_sentence2_length": 107, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.1335999999999986, + "max_score": 5.0 }, "es-en": { "num_samples": 250, "number_of_characters": 23216, + "min_sentence1_length": 12, "average_sentence1_len": 50.84, + "max_sentence1_length": 160, + "unique_sentence1": 250, + "min_sentence2_length": 14, "average_sentence2_len": 42.024, - "avg_score": 2.1464000000000003 + "max_sentence2_length": 117, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.1464000000000003, + "max_score": 5.0 }, "es-es": { "num_samples": 250, "number_of_characters": 25265, + "min_sentence1_length": 18, "average_sentence1_len": 49.836, + "max_sentence1_length": 136, + "unique_sentence1": 250, + "min_sentence2_length": 13, "average_sentence2_len": 51.224, - "avg_score": 2.2312000000000007 + "max_sentence2_length": 129, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.2312000000000007, + "max_score": 5.0 }, "fr-en": { "num_samples": 250, "number_of_characters": 23087, + "min_sentence1_length": 19, "average_sentence1_len": 49.624, + "max_sentence1_length": 115, + "unique_sentence1": 250, + "min_sentence2_length": 15, "average_sentence2_len": 42.724, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 101, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 }, "it-en": { "num_samples": 250, "number_of_characters": 23188, + "min_sentence1_length": 15, "average_sentence1_len": 50.028, + "max_sentence1_length": 113, + "unique_sentence1": 250, + "min_sentence2_length": 15, "average_sentence2_len": 42.724, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 101, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 }, "nl-en": { "num_samples": 250, "number_of_characters": 22385, + "min_sentence1_length": 14, "average_sentence1_len": 46.816, + "max_sentence1_length": 123, + "unique_sentence1": 250, + "min_sentence2_length": 15, "average_sentence2_len": 42.724, - "avg_score": 2.2776000000000014 + "max_sentence2_length": 101, + "unique_sentence2": 250, + "min_score": 0.0, + "avg_score": 2.2776000000000014, + "max_score": 5.0 } } } diff --git a/mteb/descriptive_stats/Summarization/SummEval.json b/mteb/descriptive_stats/Summarization/SummEval.json index e9c0b172e8..4c2f133abb 100644 --- a/mteb/descriptive_stats/Summarization/SummEval.json +++ b/mteb/descriptive_stats/Summarization/SummEval.json @@ -2,9 +2,54 @@ "test": { "num_samples": 100, "number_of_characters": 212735, - "avg_text_len": 2100.35, - "avg_human_summaries_len": 11.0, - "avg_machine_summaries_len": 16.0, - "avg_relevance": 3.7770833333333336 + "min_text_length": 626, + "avg_text_length": 2100.35, + "max_text_length": 3153, + "unique_texts": 100, + "min_human_summaries_length": 11, + "avg_human_summaries_length": 11.0, + "max_human_summaries_length": 11, + "unique_human_summaries": 1100, + "min_machine_summaries_length": 16, + "avg_machine_summaries_length": 16.0, + "max_machine_summaries_length": 16, + "unique_machine_summaries": 1548, + "min_relevance": [ + 1.0, + 1.3333333333333333, + 3.6666666666666665, + 2.3333333333333335, + 3.6666666666666665, + 3.0, + 4.333333333333333, + 4.0, + 2.6666666666666665, + 4.0, + 2.0, + 4.666666666666667, + 4.333333333333333, + 1.0, + 2.0, + 1.0 + ], + "avg_relevance": 3.7770833333333336, + "max_relevance": [ + 5.0, + 4.666666666666667, + 4.333333333333333, + 2.6666666666666665, + 4.666666666666667, + 4.666666666666667, + 4.666666666666667, + 4.333333333333333, + 4.0, + 4.333333333333333, + 4.666666666666667, + 4.666666666666667, + 4.333333333333333, + 2.3333333333333335, + 4.666666666666667, + 4.666666666666667 + ] } } \ No newline at end of file diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index 2ac0096da8..489b67ab43 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -52,10 +52,25 @@ class MockClassificationTask(AbsTaskClassification): "test": { "num_samples": 2, "number_of_characters": 52, + "num_texts_in_train": 1, + "min_text_length": 23, "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, - } + }, + "train": { + "num_samples": 2, + "number_of_characters": 53, + "num_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, } metadata = TaskMetadata( @@ -66,20 +81,22 @@ class MockClassificationTask(AbsTaskClassification): ) def load_data(self, **kwargs): - texts = ["This is a test sentence", "This is another test sentence"] + train_texts = ["This is a test sentence", "This is another train sentence"] + test_texts = ["This is a test sentence", "This is another test sentence"] + labels = [0, 1] self.dataset = DatasetDict( { "test": Dataset.from_dict( { - "text": texts, + "text": test_texts, "label": labels, } ), "train": Dataset.from_dict( { - "text": texts, + "text": train_texts, "label": labels, } ), @@ -93,26 +110,73 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask "test": { "num_samples": 4, "number_of_characters": 104, + "num_texts_in_train": 1, + "min_text_length": 23, "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 2}, "1": {"count": 2}}, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 52, + "num_texts_in_train": 1, + "min_text_length": 23, "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, }, "fra": { "num_samples": 2, "number_of_characters": 52, + "num_texts_in_train": 1, + "min_text_length": 23, "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, "unique_labels": 2, "labels": {"0": {"count": 1}, "1": {"count": 1}}, }, }, - } + }, + "train": { + "num_samples": 4, + "number_of_characters": 106, + "num_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 2}, "1": {"count": 2}}, + "hf_subset_descriptive_stats": { + "eng": { + "num_samples": 2, + "number_of_characters": 53, + "num_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, + "fra": { + "num_samples": 2, + "number_of_characters": 53, + "num_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, + }, + }, } metadata = TaskMetadata( @@ -124,18 +188,19 @@ class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask metadata.eval_langs = multilingual_eval_langs def load_data(self, **kwargs): - texts = ["This is a test sentence", "This is another test sentence"] + train_texts = ["This is a test sentence", "This is another train sentence"] + test_texts = ["This is a test sentence", "This is another test sentence"] labels = [0, 1] data = { "test": Dataset.from_dict( { - "text": texts, + "text": test_texts, "label": labels, } ), "train": Dataset.from_dict( { - "text": texts, + "text": train_texts, "label": labels, } ), @@ -153,10 +218,17 @@ def load_data(self, **kwargs): class MockBitextMiningTask(AbsTaskBitextMining): expected_stats = { "test": { - "average_sentence1_length": 26.0, - "average_sentence2_length": 30.5, "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 23, + "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, } } @@ -190,22 +262,43 @@ def load_data(self, **kwargs): class MockMultilingualBitextMiningTask(AbsTaskBitextMining, MultilingualTask): expected_stats = { "test": { - "average_sentence1_length": 26.0, - "average_sentence2_length": 30.5, "num_samples": 4, "number_of_characters": 226, + "unique_pairs": 2, + "min_sentence1_length": 23, + "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "hf_subset_descriptive_stats": { "eng": { - "average_sentence1_length": 26.0, - "average_sentence2_length": 30.5, "num_samples": 2, "number_of_characters": 113, - }, - "fra": { + "unique_pairs": 2, + "min_sentence1_length": 23, "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + }, + "fra": { "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 23, + "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, }, }, } @@ -246,22 +339,43 @@ class MockMultilingualParallelBitextMiningTask(AbsTaskBitextMining, Multilingual parallel_subsets = True expected_stats = { "test": { - "average_sentence1_length": 28.25, - "average_sentence2_length": 28.25, "num_samples": 4, "number_of_characters": 226, + "unique_pairs": 4, + "min_sentence1_length": 23, + "average_sentence1_length": 28.25, + "max_sentence1_length": 37, + "unique_sentence1": 4, + "min_sentence2_length": 23, + "average_sentence2_length": 28.25, + "max_sentence2_length": 37, + "unique_sentence2": 4, "hf_subset_descriptive_stats": { "eng_Latn-fra_Latn": { - "average_sentence1_length": 26.0, - "average_sentence2_length": 30.5, "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 23, + "average_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "average_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, }, "fra_Latn-eng_Latn": { - "average_sentence1_length": 30.5, - "average_sentence2_length": 26.0, "num_samples": 2, "number_of_characters": 113, + "unique_pairs": 2, + "min_sentence1_length": 24, + "average_sentence1_length": 30.5, + "max_sentence1_length": 37, + "unique_sentence1": 2, + "min_sentence2_length": 23, + "average_sentence2_length": 26.0, + "max_sentence2_length": 29, + "unique_sentence2": 2, }, }, } @@ -303,8 +417,13 @@ class MockClusteringTask(AbsTaskClustering): "test": { "num_samples": 1, "number_of_characters": 3, + "min_text_length": 3, "average_text_length": 3.0, + "max_text_length": 3, + "unique_texts": 3, + "min_labels_per_text": 1, "average_labels_per_text": 3.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, } @@ -345,24 +464,39 @@ class MockMultilingualClusteringTask(AbsTaskClustering, MultilingualTask): "test": { "num_samples": 2, "number_of_characters": 6, + "min_text_length": 3, "average_text_length": 3.0, + "max_text_length": 3, + "unique_texts": 3, + "min_labels_per_text": 2, "average_labels_per_text": 3.0, + "max_labels_per_text": 2, "unique_labels": 3, "labels": {"0": {"count": 2}, "1": {"count": 2}, "2": {"count": 2}}, "hf_subset_descriptive_stats": { "eng": { "num_samples": 1, "number_of_characters": 3, + "min_text_length": 3, "average_text_length": 3.0, + "max_text_length": 3, + "unique_texts": 3, + "min_labels_per_text": 1, "average_labels_per_text": 3.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, }, "fra": { "num_samples": 1, "number_of_characters": 3, + "min_text_length": 3, "average_text_length": 3.0, + "max_text_length": 3, + "unique_texts": 3, + "min_labels_per_text": 1, "average_labels_per_text": 3.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, }, @@ -412,8 +546,12 @@ class MockClusteringFastTask(AbsTaskClusteringFast): "test": { "num_samples": 3, "number_of_characters": 81, + "min_text_length": 23, "average_text_length": 27.0, + "max_text_length": 29, + "min_labels_per_text": 1, "average_labels_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, } @@ -454,24 +592,36 @@ class MockMultilingualClusteringFastTask(AbsTaskClusteringFast, MultilingualTask "test": { "num_samples": 6, "number_of_characters": 162, + "min_text_length": 23, "average_text_length": 27.0, + "max_text_length": 29, + "min_labels_per_text": 2, "average_labels_per_text": 1.0, + "max_labels_per_text": 2, "unique_labels": 3, "labels": {"0": {"count": 2}, "1": {"count": 2}, "2": {"count": 2}}, "hf_subset_descriptive_stats": { "eng": { "num_samples": 3, "number_of_characters": 81, + "min_text_length": 23, "average_text_length": 27.0, + "max_text_length": 29, + "min_labels_per_text": 1, "average_labels_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, }, "fra": { "num_samples": 3, "number_of_characters": 81, + "min_text_length": 23, "average_text_length": 27.0, + "max_text_length": 29, + "min_labels_per_text": 1, "average_labels_per_text": 1.0, + "max_labels_per_text": 1, "unique_labels": 3, "labels": {"0": {"count": 1}, "1": {"count": 1}, "2": {"count": 1}}, }, @@ -517,8 +667,14 @@ class MockPairClassificationTask(AbsTaskPairClassification): "test": { "num_samples": 2, "number_of_characters": 113, - "avg_sentence1_len": 26.0, - "avg_sentence2_len": 30.5, + "min_sentence1_length": 23, + "avg_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "avg_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "unique_labels": 2, "labels": {"1": {"count": 1}, "0": {"count": 1}}, } @@ -562,24 +718,42 @@ class MockMultilingualPairClassificationTask( "test": { "num_samples": 4, "number_of_characters": 226, - "avg_sentence1_len": 26.0, - "avg_sentence2_len": 30.5, + "min_sentence1_length": 23, + "avg_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "avg_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "unique_labels": 2, "labels": {"1": {"count": 2}, "0": {"count": 2}}, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 113, - "avg_sentence1_len": 26.0, - "avg_sentence2_len": 30.5, + "min_sentence1_length": 23, + "avg_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "avg_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "unique_labels": 2, "labels": {"1": {"count": 1}, "0": {"count": 1}}, }, "fra": { "num_samples": 2, "number_of_characters": 113, - "avg_sentence1_len": 26.0, - "avg_sentence2_len": 30.5, + "min_sentence1_length": 23, + "avg_sentence1_length": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, + "avg_sentence2_length": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, "unique_labels": 2, "labels": {"1": {"count": 1}, "0": {"count": 1}}, }, @@ -627,9 +801,17 @@ class MockSTSTask(AbsTaskSTS): "test": { "num_samples": 2, "number_of_characters": 113, + "min_sentence1_length": 23, "average_sentence1_len": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_len": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + "min_score": 0, "avg_score": 0.5, + "max_score": 1, } } @@ -674,23 +856,47 @@ class MockMultilingualSTSTask(AbsTaskSTS, MultilingualTask): "test": { "num_samples": 4, "number_of_characters": 226, + "min_sentence1_length": 23, "average_sentence1_len": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_len": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + "min_score": 0, "avg_score": 0.5, + "max_score": 1, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 113, + "min_sentence1_length": 23, "average_sentence1_len": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_len": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + "min_score": 0, "avg_score": 0.5, + "max_score": 1, }, "fra": { "num_samples": 2, "number_of_characters": 113, + "min_sentence1_length": 23, "average_sentence1_len": 26.0, + "max_sentence1_length": 29, + "unique_sentence1": 2, + "min_sentence2_length": 24, "average_sentence2_len": 30.5, + "max_sentence2_length": 37, + "unique_sentence2": 2, + "min_score": 0, "avg_score": 0.5, + "max_score": 1, }, }, } @@ -742,10 +948,21 @@ class MockSummarizationTask(AbsTaskSummarization): "test": { "num_samples": 2, "number_of_characters": 60, - "avg_text_len": 26.0, - "avg_human_summaries_len": 2.0, - "avg_machine_summaries_len": 2.0, + "min_text_length": 23, + "avg_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_human_summaries_length": 2, + "avg_human_summaries_length": 2.0, + "max_human_summaries_length": 2, + "unique_human_summaries": 2, + "min_machine_summaries_length": 2, + "avg_machine_summaries_length": 2.0, + "max_machine_summaries_length": 2, + "unique_machine_summaries": 2, + "min_relevance": [0, 1], "avg_relevance": 0.5, + "max_relevance": [1, 0], } } @@ -795,26 +1012,59 @@ class MockMultilingualSummarizationTask(AbsTaskSummarization, MultilingualTask): "test": { "num_samples": 4, "number_of_characters": 120, - "avg_text_len": 26.0, - "avg_human_summaries_len": 2.0, - "avg_machine_summaries_len": 2.0, + "min_text_length": 23, + "avg_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_human_summaries_length": 2, + "avg_human_summaries_length": 2.0, + "max_human_summaries_length": 2, + "unique_human_summaries": 2, + "min_machine_summaries_length": 2, + "avg_machine_summaries_length": 2.0, + "max_machine_summaries_length": 2, + "unique_machine_summaries": 2, + "min_relevance": [0, 1], "avg_relevance": 0.5, + "max_relevance": [1, 0], "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 60, - "avg_text_len": 26.0, - "avg_human_summaries_len": 2.0, - "avg_machine_summaries_len": 2.0, + "min_text_length": 23, + "avg_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_human_summaries_length": 2, + "avg_human_summaries_length": 2.0, + "max_human_summaries_length": 2, + "unique_human_summaries": 2, + "min_machine_summaries_length": 2, + "avg_machine_summaries_length": 2.0, + "max_machine_summaries_length": 2, + "unique_machine_summaries": 2, + "min_relevance": [0, 1], "avg_relevance": 0.5, + "max_relevance": [1, 0], }, "fra": { "num_samples": 2, "number_of_characters": 60, - "avg_text_len": 26.0, - "avg_human_summaries_len": 2.0, - "avg_machine_summaries_len": 2.0, + "min_text_length": 23, + "avg_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_human_summaries_length": 2, + "avg_human_summaries_length": 2.0, + "max_human_summaries_length": 2, + "unique_human_summaries": 2, + "min_machine_summaries_length": 2, + "avg_machine_summaries_length": 2.0, + "max_machine_summaries_length": 2, + "unique_machine_summaries": 2, + "min_relevance": [0, 1], "avg_relevance": 0.5, + "max_relevance": [1, 0], }, }, } @@ -872,9 +1122,18 @@ class MockRerankingTask(AbsTaskReranking): "number_of_characters": 172, "num_positive": 2, "num_negative": 2, - "avg_query_len": 26.0, - "avg_positive_len": 30.0, - "avg_negative_len": 30.0, + "min_query_length": 23, + "avg_query_length": 26.0, + "max_query_length": 29, + "unique_query": 2, + "min_positive_length": 27, + "avg_positive_length": 30.0, + "max_positive_length": 33, + "unique_positive": 2, + "min_negative_length": 27, + "avg_negative_length": 30.0, + "max_negative_length": 33, + "unique_negative": 2, } } @@ -917,27 +1176,54 @@ class MockMultilingualRerankingTask(AbsTaskReranking, MultilingualTask): "number_of_characters": 344, "num_positive": 4, "num_negative": 4, - "avg_query_len": 26.0, - "avg_positive_len": 30.0, - "avg_negative_len": 30.0, + "min_query_length": 23, + "avg_query_length": 26.0, + "max_query_length": 29, + "unique_query": 2, + "min_positive_length": 27, + "avg_positive_length": 30.0, + "max_positive_length": 33, + "unique_positive": 2, + "min_negative_length": 27, + "avg_negative_length": 30.0, + "max_negative_length": 33, + "unique_negative": 2, "hf_subset_descriptive_stats": { "eng": { "num_samples": 2, "number_of_characters": 172, "num_positive": 2, "num_negative": 2, - "avg_query_len": 26.0, - "avg_positive_len": 30.0, - "avg_negative_len": 30.0, + "min_query_length": 23, + "avg_query_length": 26.0, + "max_query_length": 29, + "unique_query": 2, + "min_positive_length": 27, + "avg_positive_length": 30.0, + "max_positive_length": 33, + "unique_positive": 2, + "min_negative_length": 27, + "avg_negative_length": 30.0, + "max_negative_length": 33, + "unique_negative": 2, }, "fra": { "num_samples": 2, "number_of_characters": 172, "num_positive": 2, "num_negative": 2, - "avg_query_len": 26.0, - "avg_positive_len": 30.0, - "avg_negative_len": 30.0, + "min_query_length": 23, + "avg_query_length": 26.0, + "max_query_length": 29, + "unique_query": 2, + "min_positive_length": 27, + "avg_positive_length": 30.0, + "max_positive_length": 33, + "unique_positive": 2, + "min_negative_length": 27, + "avg_negative_length": 30.0, + "max_negative_length": 33, + "unique_negative": 2, }, }, } @@ -982,13 +1268,22 @@ def load_data(self, **kwargs): class MockRetrievalTask(AbsTaskRetrieval): expected_stats = { "test": { + "number_of_characters": 112, "num_samples": 4, - "number_of_characters": 56.0, - "average_document_length": 15.0, - "average_query_length": 13.0, - "num_documents": 2, "num_queries": 2, - "average_relevant_docs_per_query": 1.0, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, } } @@ -1025,31 +1320,58 @@ def load_data(self, **kwargs): class MockMultilingualRetrievalTask(AbsTaskRetrieval, MultilingualTask): expected_stats = { "test": { - "number_of_characters": 56.0, + "number_of_characters": 224, "num_samples": 8, "num_queries": 4, "num_documents": 4, - "average_document_length": 7.5, - "average_query_length": 6.5, - "average_relevant_docs_per_query": 1.0, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 4, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 4, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 4, "hf_subset_descriptive_stats": { "eng": { - "number_of_characters": 56.0, + "number_of_characters": 112, "num_samples": 4, "num_queries": 2, "num_documents": 2, - "average_document_length": 15.0, - "average_query_length": 13.0, - "average_relevant_docs_per_query": 1.0, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, }, "fra": { - "number_of_characters": 56.0, + "number_of_characters": 112, "num_samples": 4, "num_queries": 2, "num_documents": 2, - "average_document_length": 15.0, - "average_query_length": 13.0, - "average_relevant_docs_per_query": 1.0, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, }, }, } @@ -1095,13 +1417,33 @@ def load_data(self, **kwargs): class MockMultilabelClassification(AbsTaskMultilabelClassification): expected_stats = { "test": { - "average_text_length": 26.0, + "num_samples": 6, "number_of_characters": 156, + "number_texts_in_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + "train": { "num_samples": 6, + "number_of_characters": 159, + "number_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, "unique_labels": 2, "labels": {"0": {"count": 6}, "1": {"count": 6}}, - } + }, } metadata = TaskMetadata( @@ -1112,20 +1454,21 @@ class MockMultilabelClassification(AbsTaskMultilabelClassification): ) def load_data(self, **kwargs): - texts = ["This is a test sentence", "This is another test sentence"] * 3 + train_texts = ["This is a test sentence", "This is another train sentence"] * 3 + test_texts = ["This is a test sentence", "This is another test sentence"] * 3 labels = [[0, 1], [1, 0]] * 3 self.dataset = DatasetDict( { "test": Dataset.from_dict( { - "text": texts, + "text": test_texts, "label": labels, } ), "train": Dataset.from_dict( { - "text": texts, + "text": train_texts, "label": labels, } ), @@ -1139,31 +1482,93 @@ class MockMultilingualMultilabelClassification( ): expected_stats = { "test": { - "average_text_length": 26.0, + "num_samples": 12, "number_of_characters": 312, + "number_texts_in_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, "average_label_per_text": 2.0, - "num_samples": 12, + "max_labels_per_text": 2, "unique_labels": 2, "labels": {"0": {"count": 12}, "1": {"count": 12}}, "hf_subset_descriptive_stats": { "eng": { - "average_text_length": 26.0, + "num_samples": 6, "number_of_characters": 156, + "number_texts_in_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, "average_label_per_text": 2.0, - "num_samples": 6, + "max_labels_per_text": 2, "unique_labels": 2, "labels": {"0": {"count": 6}, "1": {"count": 6}}, }, "fra": { - "average_text_length": 26.0, + "num_samples": 6, "number_of_characters": 156, + "number_texts_in_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + }, + }, + "train": { + "num_samples": 12, + "number_of_characters": 318, + "number_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 12}, "1": {"count": 12}}, + "hf_subset_descriptive_stats": { + "eng": { + "num_samples": 6, + "number_of_characters": 159, + "number_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + "min_labels_per_text": 2, "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + "fra": { "num_samples": 6, + "number_of_characters": 159, + "number_texts_in_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, "unique_labels": 2, "labels": {"0": {"count": 6}, "1": {"count": 6}}, }, }, - } + }, } metadata = TaskMetadata( @@ -1175,19 +1580,20 @@ class MockMultilingualMultilabelClassification( metadata.eval_langs = multilingual_eval_langs def load_data(self, **kwargs): - texts = ["This is a test sentence", "This is another test sentence"] * 3 + train_texts = ["This is a test sentence", "This is another train sentence"] * 3 + test_texts = ["This is a test sentence", "This is another test sentence"] * 3 labels = [[0, 1], [1, 0]] * 3 data = { "test": Dataset.from_dict( { - "text": texts, + "text": test_texts, "label": labels, } ), "train": Dataset.from_dict( { - "text": texts, + "text": train_texts, "label": labels, } ), @@ -1206,16 +1612,32 @@ class MockInstructionRetrival(AbsTaskInstructionRetrieval): do_length_ablation = True expected_stats = { "test": { - "average_changed_instruction_length": 37.0, + "num_samples": 4, + "num_docs": 2, + "num_queries": 2, + "number_of_characters": 244, + "min_document_length": 27, "average_document_length": 30.0, - "average_instruction_length": 29.0, + "max_document_length": 33, + "unique_docs": 2, + "min_query_length": 23, "average_query_length": 26.0, + "max_query_length": 29, + "unique_queries": 2, + "min_instruction_length": 26, + "average_instruction_length": 29.0, + "max_instruction_length": 32, + "unique_instructions": 2, + "min_changed_instruction_length": 34, + "average_changed_instruction_length": 37.0, + "max_changed_instruction_length": 40, + "unique_changed_instructions": 2, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_average_relevant_docs_per_query": 1, + "min_average_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, - "num_docs": 2, - "num_queries": 2, - "num_samples": 4, - "number_of_characters": 244, + "max_average_top_ranked_per_query": 2, } } @@ -1297,36 +1719,84 @@ class MockMultilingualInstructionRetrival( "num_docs": 4, "num_queries": 4, "number_of_characters": 488, + "min_document_length": 27, "average_document_length": 30.0, + "max_document_length": 33, + "unique_docs": 2, + "min_query_length": 23, "average_query_length": 26.0, + "max_query_length": 29, + "unique_queries": 2, + "min_instruction_length": 26, "average_instruction_length": 29.0, + "max_instruction_length": 32, + "unique_instructions": 2, + "min_changed_instruction_length": 34, "average_changed_instruction_length": 37.0, + "max_changed_instruction_length": 40, + "unique_changed_instructions": 2, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_average_relevant_docs_per_query": 1, + "min_average_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, + "max_average_top_ranked_per_query": 2, "hf_subset_descriptive_stats": { "eng": { "num_samples": 4, "num_docs": 2, "num_queries": 2, "number_of_characters": 244, + "min_document_length": 27, "average_document_length": 30.0, + "max_document_length": 33, + "unique_docs": 2, + "min_query_length": 23, "average_query_length": 26.0, + "max_query_length": 29, + "unique_queries": 2, + "min_instruction_length": 26, "average_instruction_length": 29.0, + "max_instruction_length": 32, + "unique_instructions": 2, + "min_changed_instruction_length": 34, "average_changed_instruction_length": 37.0, + "max_changed_instruction_length": 40, + "unique_changed_instructions": 2, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_average_relevant_docs_per_query": 1, + "min_average_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, + "max_average_top_ranked_per_query": 2, }, "fra": { "num_samples": 4, "num_docs": 2, "num_queries": 2, "number_of_characters": 244, + "min_document_length": 27, "average_document_length": 30.0, + "max_document_length": 33, + "unique_docs": 2, + "min_query_length": 23, "average_query_length": 26.0, + "max_query_length": 29, + "unique_queries": 2, + "min_instruction_length": 26, "average_instruction_length": 29.0, + "max_instruction_length": 32, + "unique_instructions": 2, + "min_changed_instruction_length": 34, "average_changed_instruction_length": 37.0, + "max_changed_instruction_length": 40, + "unique_changed_instructions": 2, + "min_average_relevant_docs_per_query": 1, "average_relevant_docs_per_query": 1.0, + "max_average_relevant_docs_per_query": 1, + "min_average_top_ranked_per_query": 2, "average_top_ranked_per_query": 2.0, + "max_average_top_ranked_per_query": 2, }, }, } diff --git a/tests/test_tasks/test_metadata.py b/tests/test_tasks/test_metadata.py index 1e7e1b24df..3d206da5c8 100644 --- a/tests/test_tasks/test_metadata.py +++ b/tests/test_tasks/test_metadata.py @@ -8,9 +8,9 @@ @pytest.mark.parametrize("task", MOCK_TASK_TEST_GRID) def test_descriptive_stats(task): result_stat = task.calculate_metadata_metrics() + # remove descriptive task file + task.metadata.descriptive_stat_path.unlink() task_stat = task.expected_stats for key, value in result_stat.items(): assert key in task_stat assert value == task_stat[key] - # remove descriptive task file - task.metadata.descriptive_stat_path.unlink() From 04ac3f21139db2ea50fdef4d91c345f61f229d44 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Thu, 14 Nov 2024 11:53:39 +0100 Subject: [PATCH 02/76] fix: update task metadata to allow for null (#1448) --- mteb/leaderboard/figures.py | 10 ++++++---- mteb/leaderboard/table.py | 2 +- mteb/load_results/task_results.py | 7 +++++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/mteb/leaderboard/figures.py b/mteb/leaderboard/figures.py index e8419d9a31..7a354f7c82 100644 --- a/mteb/leaderboard/figures.py +++ b/mteb/leaderboard/figures.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import numpy as np import pandas as pd import plotly.express as px @@ -68,7 +70,7 @@ def performance_size_plot(df: pd.DataFrame) -> go.Figure: hover_name="Model", ) fig.update_layout( - coloraxis_colorbar=dict( + coloraxis_colorbar=dict( # noqa title="Max Tokens", tickvals=[2, 3, 4, 5], ticktext=[ @@ -78,7 +80,7 @@ def performance_size_plot(df: pd.DataFrame) -> go.Figure: "100K", ], ), - hoverlabel=dict( + hoverlabel=dict( # noqa bgcolor="white", font_size=16, ), @@ -87,7 +89,7 @@ def performance_size_plot(df: pd.DataFrame) -> go.Figure: textposition="top center", ) fig.update_layout( - font=dict(size=16, color="black"), - margin=dict(b=20, t=10, l=20, r=10), + font=dict(size=16, color="black"), # noqa + margin=dict(b=20, t=10, l=20, r=10), # noqa ) return fig diff --git a/mteb/leaderboard/table.py b/mteb/leaderboard/table.py index 034b33b4f8..d9b830d236 100644 --- a/mteb/leaderboard/table.py +++ b/mteb/leaderboard/table.py @@ -88,7 +88,7 @@ def get_means_per_types(df: pd.DataFrame) -> pd.DataFrame: [name_to_score.get(task_name, np.nan) for task_name in task_names] ) records.append( - dict( + dict( # noqa model_name=model_name, model_revision=model_revision, task_type=task_type, diff --git a/mteb/load_results/task_results.py b/mteb/load_results/task_results.py index ce2e979654..202ed9b5f5 100644 --- a/mteb/load_results/task_results.py +++ b/mteb/load_results/task_results.py @@ -156,9 +156,9 @@ class TaskResult(BaseModel): dataset_revision: str task_name: str - mteb_version: str + mteb_version: str | None scores: dict[Split, list[ScoresDict]] - evaluation_time: float + evaluation_time: float | None kg_co2_emissions: float | None = None @classmethod @@ -290,6 +290,9 @@ def from_disk(cls, path: Path, load_historic_data: bool = True) -> TaskResult: f"Error loading TaskResult from disk. You can try to load historic data by setting `load_historic_data=True`. Error: {e}" ) + if data["mteb_version"] is None: + data.pop("mteb_version") + pre_1_11_load = ( ( "mteb_version" in data From f6a49fef74724bed1a7e19d6b895324ed25cff13 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 14 Nov 2024 10:56:10 +0000 Subject: [PATCH 03/76] Update tasks table --- docs/tasks.md | 100 +++++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/docs/tasks.md b/docs/tasks.md index 164daf7102..fd61d8af39 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -25,24 +25,24 @@ The following tables give you an overview of the tasks in MTEB. | [AmazonPolarityClassification](https://huggingface.co/datasets/amazon_polarity) (Julian McAuley, 2013) | ['eng'] | Classification | p2p | [Reviews, Written] | None | None | | [AmazonReviewsClassification](https://arxiv.org/abs/2010.02573) (Phillip Keung, 2020) | ['cmn', 'deu', 'eng', 'fra', 'jpn', 'spa'] | Classification | s2s | [Reviews, Written] | None | None | | [AngryTweetsClassification](https://aclanthology.org/2021.nodalida-main.53/) (Pauli et al., 2021) | ['dan'] | Classification | s2s | [Social, Written] | None | None | -| [AppsRetrieval](https://arxiv.org/abs/2105.09938) (Dan Hendrycks, 2021) | ['eng', 'python'] | Retrieval | p2p | [Programming, Written] | {'test': 12530} | {'test': {'number_of_characters': 2245.84, 'num_samples': 12530, 'num_queries': 3765, 'num_documents': 8765, 'average_document_length': 0.07, 'average_query_length': 0.44, 'average_relevant_docs_per_query': 1.0}} | +| [AppsRetrieval](https://arxiv.org/abs/2105.09938) (Dan Hendrycks, 2021) | ['eng', 'python'] | Retrieval | p2p | [Programming, Written] | {'test': 12530} | {'test': {'number_of_characters': 11335620, 'num_samples': 12530, 'num_queries': 3765, 'num_documents': 8765, 'min_document_length': 152, 'average_document_length': 717.27, 'max_document_length': 5742, 'unique_documents': 8765, 'min_query_length': 6, 'average_query_length': 1340.96, 'max_query_length': 289049, 'unique_queries': 3765, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3765}} | | [ArEntail](https://link.springer.com/article/10.1007/s10579-024-09731-1) (Obeidat et al., 2024) | ['ara'] | PairClassification | s2s | [News, Written] | None | None | -| [ArXivHierarchicalClusteringP2P](https://www.kaggle.com/Cornell-University/arxiv) | ['eng'] | Clustering | p2p | [Academic, Written] | {'test': 2048} | {'test': {'num_samples': 2048, 'number_of_characters': 2065284, 'average_text_length': 1008.44, 'average_labels_per_text': 1.46, 'unique_labels': 129, 'labels': {'cs': {'count': 356}, 'math': {'count': 381}, 'OC': {'count': 11}, 'hep-lat': {'count': 13}, 'hep': {'count': 98}, 'astro-ph': {'count': 213}, 'eess': {'count': 76}, 'quant-ph': {'count': 135}, 'DC': {'count': 5}, 'cond-mat': {'count': 274}, 'hep-th': {'count': 66}, 'SP': {'count': 33}, 'hep-ph': {'count': 69}, 'FA': {'count': 6}, 'nucl-th': {'count': 17}, 'q-bio': {'count': 80}, 'HE': {'count': 22}, 'HC': {'count': 2}, 'stat': {'count': 60}, 'ML': {'count': 16}, 'IV': {'count': 13}, 'stat-mech': {'count': 47}, 'DS': {'count': 14}, 'ME': {'count': 12}, 'CC': {'count': 2}, 'mtrl-sci': {'count': 22}, 'PE': {'count': 16}, 'NT': {'count': 11}, 'SC': {'count': 6}, 'AG': {'count': 13}, 'physics': {'count': 81}, 'ins-det': {'count': 9}, 'GA': {'count': 18}, 'BM': {'count': 6}, 'GN': {'count': 17}, 'NA': {'count': 15}, 'app-ph': {'count': 7}, 'RT': {'count': 6}, 'other': {'count': 37}, 'soft': {'count': 15}, 'CO': {'count': 33}, 'supr-con': {'count': 21}, 'chem-ph': {'count': 3}, 'DM': {'count': 2}, 'MN': {'count': 12}, 'q-fin': {'count': 27}, 'PM': {'count': 2}, 'AP': {'count': 27}, 'gr-qc': {'count': 15}, 'quant-gas': {'count': 8}, 'mes-hall': {'count': 33}, 'IT': {'count': 19}, 'SI': {'count': 6}, 'SG': {'count': 3}, 'bio-ph': {'count': 2}, 'SR': {'count': 16}, 'soc-ph': {'count': 5}, 'hep-ex': {'count': 15}, 'DG': {'count': 11}, 'NE': {'count': 5}, 'CR': {'count': 6}, 'CL': {'count': 12}, 'RM': {'count': 3}, 'econ': {'count': 17}, 'nlin': {'count': 5}, 'PS': {'count': 1}, 'LG': {'count': 26}, 'QA': {'count': 9}, 'str-el': {'count': 26}, 'CV': {'count': 34}, 'MF': {'count': 6}, 'IM': {'count': 7}, 'EM': {'count': 6}, 'TH': {'count': 5}, 'PR': {'count': 20}, 'AT': {'count': 4}, 'OA': {'count': 4}, 'CP': {'count': 6}, 'LO': {'count': 14}, 'flu-dyn': {'count': 6}, 'atom-ph': {'count': 8}, 'class-ph': {'count': 1}, 'SY': {'count': 20}, 'IR': {'count': 1}, 'plasm-ph': {'count': 8}, 'CE': {'count': 2}, 'AO': {'count': 1}, 'comp-ph': {'count': 3}, 'optics': {'count': 12}, 'MG': {'count': 4}, 'ST': {'count': 6}, 'nucl-ex': {'count': 6}, 'CY': {'count': 9}, 'ao-ph': {'count': 2}, 'DB': {'count': 1}, 'math-ph': {'count': 10}, 'NC': {'count': 13}, 'GT': {'count': 11}, 'TO': {'count': 2}, 'AI': {'count': 9}, 'NI': {'count': 2}, 'gen-ph': {'count': 4}, 'OT': {'count': 4}, 'SD': {'count': 2}, 'dis-nn': {'count': 4}, 'RO': {'count': 7}, 'CA': {'count': 6}, 'FL': {'count': 1}, 'SE': {'count': 5}, 'EP': {'count': 9}, 'hist-ph': {'count': 1}, 'QM': {'count': 9}, 'ed-ph': {'count': 2}, 'GR': {'count': 4}, 'MS': {'count': 1}, 'CD': {'count': 1}, 'ET': {'count': 1}, 'acc-ph': {'count': 5}, 'AC': {'count': 2}, 'OH': {'count': 1}, 'EC': {'count': 2}, 'DL': {'count': 1}, 'AS': {'count': 3}, 'geo-ph': {'count': 2}, 'CG': {'count': 3}, 'CB': {'count': 1}, 'AR': {'count': 1}, 'TR': {'count': 1}, 'atm-clus': {'count': 1}}}} | +| [ArXivHierarchicalClusteringP2P](https://www.kaggle.com/Cornell-University/arxiv) | ['eng'] | Clustering | p2p | [Academic, Written] | {'test': 2048} | {'test': {'num_samples': 2048, 'number_of_characters': 2065284, 'min_text_length': 103, 'average_text_length': 1008.44, 'max_text_length': 2103, 'min_labels_per_text': 1, 'average_labels_per_text': 1.46, 'max_labels_per_text': 381, 'unique_labels': 129, 'labels': {'cs': {'count': 356}, 'math': {'count': 381}, 'OC': {'count': 11}, 'hep-lat': {'count': 13}, 'hep': {'count': 98}, 'astro-ph': {'count': 213}, 'eess': {'count': 76}, 'quant-ph': {'count': 135}, 'DC': {'count': 5}, 'cond-mat': {'count': 274}, 'hep-th': {'count': 66}, 'SP': {'count': 33}, 'hep-ph': {'count': 69}, 'FA': {'count': 6}, 'nucl-th': {'count': 17}, 'q-bio': {'count': 80}, 'HE': {'count': 22}, 'HC': {'count': 2}, 'stat': {'count': 60}, 'ML': {'count': 16}, 'IV': {'count': 13}, 'stat-mech': {'count': 47}, 'DS': {'count': 14}, 'ME': {'count': 12}, 'CC': {'count': 2}, 'mtrl-sci': {'count': 22}, 'PE': {'count': 16}, 'NT': {'count': 11}, 'SC': {'count': 6}, 'AG': {'count': 13}, 'physics': {'count': 81}, 'ins-det': {'count': 9}, 'GA': {'count': 18}, 'BM': {'count': 6}, 'GN': {'count': 17}, 'NA': {'count': 15}, 'app-ph': {'count': 7}, 'RT': {'count': 6}, 'other': {'count': 37}, 'soft': {'count': 15}, 'CO': {'count': 33}, 'supr-con': {'count': 21}, 'chem-ph': {'count': 3}, 'DM': {'count': 2}, 'MN': {'count': 12}, 'q-fin': {'count': 27}, 'PM': {'count': 2}, 'AP': {'count': 27}, 'gr-qc': {'count': 15}, 'quant-gas': {'count': 8}, 'mes-hall': {'count': 33}, 'IT': {'count': 19}, 'SI': {'count': 6}, 'SG': {'count': 3}, 'bio-ph': {'count': 2}, 'SR': {'count': 16}, 'soc-ph': {'count': 5}, 'hep-ex': {'count': 15}, 'DG': {'count': 11}, 'NE': {'count': 5}, 'CR': {'count': 6}, 'CL': {'count': 12}, 'RM': {'count': 3}, 'econ': {'count': 17}, 'nlin': {'count': 5}, 'PS': {'count': 1}, 'LG': {'count': 26}, 'QA': {'count': 9}, 'str-el': {'count': 26}, 'CV': {'count': 34}, 'MF': {'count': 6}, 'IM': {'count': 7}, 'EM': {'count': 6}, 'TH': {'count': 5}, 'PR': {'count': 20}, 'AT': {'count': 4}, 'OA': {'count': 4}, 'CP': {'count': 6}, 'LO': {'count': 14}, 'flu-dyn': {'count': 6}, 'atom-ph': {'count': 8}, 'class-ph': {'count': 1}, 'SY': {'count': 20}, 'IR': {'count': 1}, 'plasm-ph': {'count': 8}, 'CE': {'count': 2}, 'AO': {'count': 1}, 'comp-ph': {'count': 3}, 'optics': {'count': 12}, 'MG': {'count': 4}, 'ST': {'count': 6}, 'nucl-ex': {'count': 6}, 'CY': {'count': 9}, 'ao-ph': {'count': 2}, 'DB': {'count': 1}, 'math-ph': {'count': 10}, 'NC': {'count': 13}, 'GT': {'count': 11}, 'TO': {'count': 2}, 'AI': {'count': 9}, 'NI': {'count': 2}, 'gen-ph': {'count': 4}, 'OT': {'count': 4}, 'SD': {'count': 2}, 'dis-nn': {'count': 4}, 'RO': {'count': 7}, 'CA': {'count': 6}, 'FL': {'count': 1}, 'SE': {'count': 5}, 'EP': {'count': 9}, 'hist-ph': {'count': 1}, 'QM': {'count': 9}, 'ed-ph': {'count': 2}, 'GR': {'count': 4}, 'MS': {'count': 1}, 'CD': {'count': 1}, 'ET': {'count': 1}, 'acc-ph': {'count': 5}, 'AC': {'count': 2}, 'OH': {'count': 1}, 'EC': {'count': 2}, 'DL': {'count': 1}, 'AS': {'count': 3}, 'geo-ph': {'count': 2}, 'CG': {'count': 3}, 'CB': {'count': 1}, 'AR': {'count': 1}, 'TR': {'count': 1}, 'atm-clus': {'count': 1}}}} | | [ArXivHierarchicalClusteringS2S](https://www.kaggle.com/Cornell-University/arxiv) | ['eng'] | Clustering | p2p | [Academic, Written] | None | None | | [ArguAna](http://argumentation.bplaced.net/arguana/data) (Boteva et al., 2016) | ['eng'] | Retrieval | s2p | [Medical, Written] | None | None | | [ArguAna-PL](https://huggingface.co/datasets/clarin-knext/arguana-pl) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | | None | None | | [ArmenianParaphrasePC](https://github.com/ivannikov-lab/arpa-paraphrase-corpus) (Arthur Malajyan, 2020) | ['hye'] | PairClassification | s2s | [News, Written] | None | None | | [ArxivClassification](https://ieeexplore.ieee.org/document/8675939) (He et al., 2019) | ['eng'] | Classification | s2s | [Academic, Written] | None | None | -| [AskUbuntuDupQuestions](https://github.com/taolei87/askubuntu) | ['eng'] | Reranking | s2s | | {'test': 375} | {'test': {'num_samples': 375, 'number_of_characters': 413674, 'num_positive': 2255, 'num_negative': 5245, 'avg_query_len': 50.21, 'avg_positive_len': 52.54, 'avg_negative_len': 52.69}} | +| [AskUbuntuDupQuestions](https://github.com/taolei87/askubuntu) | ['eng'] | Reranking | s2s | | {'test': 375} | {'test': {'num_samples': 375, 'number_of_characters': 413674, 'num_positive': 2255, 'num_negative': 5245, 'min_query_length': 17, 'avg_query_length': 50.21, 'max_query_length': 148, 'unique_query': 374, 'min_positive_length': 15, 'avg_positive_length': 52.54, 'max_positive_length': 152, 'unique_positive': 2165, 'min_negative_length': 15, 'avg_negative_length': 52.69, 'max_negative_length': 148, 'unique_negative': 5002}} | | [Assin2RTE](https://link.springer.com/chapter/10.1007/978-3-030-41505-1_39) (Real et al., 2020) | ['por'] | PairClassification | s2s | [Written] | None | None | | [Assin2STS](https://link.springer.com/chapter/10.1007/978-3-030-41505-1_39) (Real et al., 2020) | ['por'] | STS | s2s | [Written] | None | None | | [AutoRAGRetrieval](https://arxiv.org/abs/2410.20878) (Dongkyu Kim, 2024) | ['kor'] | Retrieval | s2p | [Government, Medical, Legal, Social] | {'test': 834} | {'test': {'number_of_characters': 894.22, 'num_samples': 834, 'num_queries': 114, 'num_documents': 720, 'average_document_length': 1.15, 'average_query_length': 0.61, 'average_relevant_docs_per_query': 1.0}} | | [BIOSSES](https://tabilab.cmpe.boun.edu.tr/BIOSSES/DataSet.html) (Soğancıoğlu et al., 2017) | ['eng'] | STS | s2s | | None | None | | [BQ](https://aclanthology.org/2021.emnlp-main.357) (Shitao Xiao, 2024) | ['cmn'] | STS | s2s | | None | None | | [BSARDRetrieval](https://huggingface.co/datasets/maastrichtlawtech/bsard) (Louis et al., 2022) | ['fra'] | Retrieval | s2p | [Legal, Spoken] | None | None | -| [BUCC.v2](https://comparable.limsi.fr/bucc2018/bucc2018-task.html) | ['cmn', 'deu', 'eng', 'fra', 'rus'] | BitextMining | s2s | [Written] | None | None | +| [BUCC.v2](https://comparable.limsi.fr/bucc2018/bucc2018-task.html) | ['cmn', 'deu', 'eng', 'fra', 'rus'] | BitextMining | s2s | [Written] | {'test': 35000} | {'test': {'num_samples': 35000, 'number_of_characters': 6640032, 'unique_pairs': 34978, 'min_sentence1_length': 16, 'average_sentence1_length': 99.11, 'max_sentence1_length': 204, 'unique_sentence1': 34978, 'min_sentence2_length': 42, 'average_sentence2_length': 90.61, 'max_sentence2_length': 159, 'unique_sentence2': 25306, 'hf_subset_descriptive_stats': {'de-en': {'num_samples': 9580, 'number_of_characters': 1919197, 'unique_pairs': 9573, 'min_sentence1_length': 50, 'average_sentence1_length': 109.08, 'max_sentence1_length': 204, 'unique_sentence1': 9573, 'min_sentence2_length': 46, 'average_sentence2_length': 91.25, 'max_sentence2_length': 155, 'unique_sentence2': 9570}, 'fr-en': {'num_samples': 9086, 'number_of_characters': 1677545, 'unique_pairs': 9081, 'min_sentence1_length': 43, 'average_sentence1_length': 99.32, 'max_sentence1_length': 174, 'unique_sentence1': 9081, 'min_sentence2_length': 42, 'average_sentence2_length': 85.31, 'max_sentence2_length': 159, 'unique_sentence2': 9076}, 'ru-en': {'num_samples': 14435, 'number_of_characters': 2808206, 'unique_pairs': 14425, 'min_sentence1_length': 40, 'average_sentence1_length': 101.66, 'max_sentence1_length': 186, 'unique_sentence1': 14425, 'min_sentence2_length': 45, 'average_sentence2_length': 92.88, 'max_sentence2_length': 159, 'unique_sentence2': 14424}, 'zh-en': {'num_samples': 1899, 'number_of_characters': 235084, 'unique_pairs': 1899, 'min_sentence1_length': 16, 'average_sentence1_length': 28.43, 'max_sentence1_length': 40, 'unique_sentence1': 1899, 'min_sentence2_length': 48, 'average_sentence2_length': 95.36, 'max_sentence2_length': 159, 'unique_sentence2': 1899}}}} | | [Banking77Classification](https://arxiv.org/abs/2003.04807) | ['eng'] | Classification | s2s | [Written] | None | None | -| [BelebeleRetrieval](https://arxiv.org/abs/2308.16884) (Lucas Bandarkar, 2023) | ['acm', 'afr', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'azj', 'bam', 'ben', 'bod', 'bul', 'cat', 'ceb', 'ces', 'ckb', 'dan', 'deu', 'ell', 'eng', 'est', 'eus', 'fin', 'fra', 'fuv', 'gaz', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kac', 'kan', 'kat', 'kaz', 'kea', 'khk', 'khm', 'kin', 'kir', 'kor', 'lao', 'lin', 'lit', 'lug', 'luo', 'lvs', 'mal', 'mar', 'mkd', 'mlt', 'mri', 'mya', 'nld', 'nob', 'npi', 'nso', 'nya', 'ory', 'pan', 'pbt', 'pes', 'plt', 'pol', 'por', 'ron', 'rus', 'shn', 'sin', 'slk', 'slv', 'sna', 'snd', 'som', 'sot', 'spa', 'srp', 'ssw', 'sun', 'swe', 'swh', 'tam', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tsn', 'tso', 'tur', 'ukr', 'urd', 'uzn', 'vie', 'war', 'wol', 'xho', 'yor', 'zho', 'zsm', 'zul'] | Retrieval | s2p | [Web, News, Written] | {'test': 521866} | {'test': {'number_of_characters': 76.5, 'num_samples': 521866, 'num_queries': 338378, 'num_documents': 183488, 'average_document_length': 0.0, 'average_query_length': 0.0, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'acm_Arab-acm_Arab': {'number_of_characters': 57.84, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'acm_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-acm_Arab': {'number_of_characters': 57.84, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'afr_Latn-afr_Latn': {'number_of_characters': 80.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'afr_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-afr_Latn': {'number_of_characters': 80.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'als_Latn-als_Latn': {'number_of_characters': 78.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'als_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-als_Latn': {'number_of_characters': 78.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'amh_Ethi-amh_Ethi': {'number_of_characters': 51.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}, 'amh_Ethi-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-amh_Ethi': {'number_of_characters': 51.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}, 'apc_Arab-apc_Arab': {'number_of_characters': 57.86, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'apc_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-apc_Arab': {'number_of_characters': 57.86, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'arb_Arab-arb_Arab': {'number_of_characters': 60.55, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arb_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-arb_Arab': {'number_of_characters': 60.55, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arb_Latn-arb_Latn': {'number_of_characters': 69.02, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arb_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-arb_Latn': {'number_of_characters': 69.02, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'ars_Arab-ars_Arab': {'number_of_characters': 58.43, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'ars_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ars_Arab': {'number_of_characters': 58.43, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'ary_Arab-ary_Arab': {'number_of_characters': 68.02, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'ary_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ary_Arab': {'number_of_characters': 68.02, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arz_Arab-arz_Arab': {'number_of_characters': 59.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'arz_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-arz_Arab': {'number_of_characters': 59.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'asm_Beng-asm_Beng': {'number_of_characters': 70.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'asm_Beng-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-asm_Beng': {'number_of_characters': 70.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'azj_Latn-azj_Latn': {'number_of_characters': 75.51, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'azj_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-azj_Latn': {'number_of_characters': 75.51, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'bam_Latn-bam_Latn': {'number_of_characters': 74.34, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'bam_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-bam_Latn': {'number_of_characters': 74.34, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Beng-ben_Beng': {'number_of_characters': 71.48, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Beng-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ben_Beng': {'number_of_characters': 71.48, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Latn-ben_Latn': {'number_of_characters': 76.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ben_Latn': {'number_of_characters': 76.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'bod_Tibt-bod_Tibt': {'number_of_characters': 88.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'bod_Tibt-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-bod_Tibt': {'number_of_characters': 88.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'bul_Cyrl-bul_Cyrl': {'number_of_characters': 74.89, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'bul_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-bul_Cyrl': {'number_of_characters': 74.89, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'cat_Latn-cat_Latn': {'number_of_characters': 77.41, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'cat_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-cat_Latn': {'number_of_characters': 77.41, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ceb_Latn-ceb_Latn': {'number_of_characters': 83.2, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ceb_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ceb_Latn': {'number_of_characters': 83.2, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ces_Latn-ces_Latn': {'number_of_characters': 69.73, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ces_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ces_Latn': {'number_of_characters': 69.73, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ckb_Arab-ckb_Arab': {'number_of_characters': 73.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ckb_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ckb_Arab': {'number_of_characters': 73.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'dan_Latn-dan_Latn': {'number_of_characters': 74.97, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'dan_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-dan_Latn': {'number_of_characters': 74.97, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'deu_Latn-deu_Latn': {'number_of_characters': 77.32, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'deu_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-deu_Latn': {'number_of_characters': 77.32, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ell_Grek-ell_Grek': {'number_of_characters': 88.93, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'ell_Grek-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ell_Grek': {'number_of_characters': 88.93, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'est_Latn-est_Latn': {'number_of_characters': 69.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'est_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-est_Latn': {'number_of_characters': 69.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'eus_Latn-eus_Latn': {'number_of_characters': 76.45, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'eus_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-eus_Latn': {'number_of_characters': 76.45, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'fin_Latn-fin_Latn': {'number_of_characters': 74.51, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'fin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-fin_Latn': {'number_of_characters': 74.51, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'fra_Latn-fra_Latn': {'number_of_characters': 92.54, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'fra_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-fra_Latn': {'number_of_characters': 92.54, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'fuv_Latn-fuv_Latn': {'number_of_characters': 60.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'fuv_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-fuv_Latn': {'number_of_characters': 60.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'gaz_Latn-gaz_Latn': {'number_of_characters': 87.93, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'gaz_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-gaz_Latn': {'number_of_characters': 87.93, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'grn_Latn-grn_Latn': {'number_of_characters': 77.11, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'grn_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-grn_Latn': {'number_of_characters': 77.11, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'guj_Gujr-guj_Gujr': {'number_of_characters': 64.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'guj_Gujr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-guj_Gujr': {'number_of_characters': 64.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'hat_Latn-hat_Latn': {'number_of_characters': 72.65, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hat_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hat_Latn': {'number_of_characters': 72.65, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hau_Latn-hau_Latn': {'number_of_characters': 87.85, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'hau_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hau_Latn': {'number_of_characters': 87.85, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'heb_Hebr-heb_Hebr': {'number_of_characters': 57.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'heb_Hebr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-heb_Hebr': {'number_of_characters': 57.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'hin_Deva-hin_Deva': {'number_of_characters': 74.62, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Deva-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hin_Deva': {'number_of_characters': 74.62, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Latn-hin_Latn': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hin_Latn': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hrv_Latn-hrv_Latn': {'number_of_characters': 70.84, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hrv_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hrv_Latn': {'number_of_characters': 70.84, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hun_Latn-hun_Latn': {'number_of_characters': 76.41, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hun_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hun_Latn': {'number_of_characters': 76.41, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hye_Armn-hye_Armn': {'number_of_characters': 77.43, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hye_Armn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hye_Armn': {'number_of_characters': 77.43, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ibo_Latn-ibo_Latn': {'number_of_characters': 74.52, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ibo_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ibo_Latn': {'number_of_characters': 74.52, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ilo_Latn-ilo_Latn': {'number_of_characters': 87.76, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'ilo_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ilo_Latn': {'number_of_characters': 87.76, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'ind_Latn-ind_Latn': {'number_of_characters': 84.11, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ind_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ind_Latn': {'number_of_characters': 84.11, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'isl_Latn-isl_Latn': {'number_of_characters': 79.27, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'isl_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-isl_Latn': {'number_of_characters': 79.27, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ita_Latn-ita_Latn': {'number_of_characters': 85.5, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ita_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ita_Latn': {'number_of_characters': 85.5, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'jav_Latn-jav_Latn': {'number_of_characters': 80.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'jav_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-jav_Latn': {'number_of_characters': 80.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'jpn_Jpan-jpn_Jpan': {'number_of_characters': 37.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'jpn_Jpan-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-jpn_Jpan': {'number_of_characters': 37.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'kac_Latn-kac_Latn': {'number_of_characters': 100.64, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'kac_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kac_Latn': {'number_of_characters': 100.64, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'kan_Knda-kan_Knda': {'number_of_characters': 74.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kan_Knda-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kan_Knda': {'number_of_characters': 74.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kat_Geor-kat_Geor': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kat_Geor-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kat_Geor': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kaz_Cyrl-kaz_Cyrl': {'number_of_characters': 72.76, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kaz_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kaz_Cyrl': {'number_of_characters': 72.76, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kea_Latn-kea_Latn': {'number_of_characters': 77.94, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kea_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kea_Latn': {'number_of_characters': 77.94, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'khk_Cyrl-khk_Cyrl': {'number_of_characters': 75.33, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'khk_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-khk_Cyrl': {'number_of_characters': 75.33, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'khm_Khmr-khm_Khmr': {'number_of_characters': 77.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'khm_Khmr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-khm_Khmr': {'number_of_characters': 77.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kin_Latn-kin_Latn': {'number_of_characters': 81.9, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'kin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kin_Latn': {'number_of_characters': 81.9, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'kir_Cyrl-kir_Cyrl': {'number_of_characters': 76.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kir_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kir_Cyrl': {'number_of_characters': 76.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kor_Hang-kor_Hang': {'number_of_characters': 37.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'kor_Hang-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kor_Hang': {'number_of_characters': 37.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'lao_Laoo-lao_Laoo': {'number_of_characters': 65.31, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'lao_Laoo-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lao_Laoo': {'number_of_characters': 65.31, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'lin_Latn-lin_Latn': {'number_of_characters': 83.57, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'lin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lin_Latn': {'number_of_characters': 83.57, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'lit_Latn-lit_Latn': {'number_of_characters': 70.7, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'lit_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lit_Latn': {'number_of_characters': 70.7, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'lug_Latn-lug_Latn': {'number_of_characters': 80.52, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'lug_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lug_Latn': {'number_of_characters': 80.52, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'luo_Latn-luo_Latn': {'number_of_characters': 75.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'luo_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-luo_Latn': {'number_of_characters': 75.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'lvs_Latn-lvs_Latn': {'number_of_characters': 71.98, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'lvs_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lvs_Latn': {'number_of_characters': 71.98, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mal_Mlym-mal_Mlym': {'number_of_characters': 82.69, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'mal_Mlym-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mal_Mlym': {'number_of_characters': 82.69, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'mar_Deva-mar_Deva': {'number_of_characters': 70.63, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mar_Deva-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mar_Deva': {'number_of_characters': 70.63, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mkd_Cyrl-mkd_Cyrl': {'number_of_characters': 76.01, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mkd_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mkd_Cyrl': {'number_of_characters': 76.01, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mlt_Latn-mlt_Latn': {'number_of_characters': 77.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mlt_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mlt_Latn': {'number_of_characters': 77.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mri_Latn-mri_Latn': {'number_of_characters': 83.71, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'mri_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mri_Latn': {'number_of_characters': 83.71, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'mya_Mymr-mya_Mymr': {'number_of_characters': 91.28, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'mya_Mymr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mya_Mymr': {'number_of_characters': 91.28, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'nld_Latn-nld_Latn': {'number_of_characters': 77.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'nld_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nld_Latn': {'number_of_characters': 77.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'nob_Latn-nob_Latn': {'number_of_characters': 73.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'nob_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nob_Latn': {'number_of_characters': 73.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'npi_Deva-npi_Deva': {'number_of_characters': 68.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'npi_Deva-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-npi_Deva': {'number_of_characters': 68.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'npi_Latn-npi_Latn': {'number_of_characters': 73.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'npi_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-npi_Latn': {'number_of_characters': 73.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'nso_Latn-nso_Latn': {'number_of_characters': 88.77, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'nso_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nso_Latn': {'number_of_characters': 88.77, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'nya_Latn-nya_Latn': {'number_of_characters': 92.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'nya_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nya_Latn': {'number_of_characters': 92.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'ory_Orya-ory_Orya': {'number_of_characters': 74.96, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ory_Orya-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ory_Orya': {'number_of_characters': 74.96, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pan_Guru-pan_Guru': {'number_of_characters': 75.3, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pan_Guru-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pan_Guru': {'number_of_characters': 75.3, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pbt_Arab-pbt_Arab': {'number_of_characters': 69.67, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pbt_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pbt_Arab': {'number_of_characters': 69.67, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pes_Arab-pes_Arab': {'number_of_characters': 66.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'pes_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pes_Arab': {'number_of_characters': 66.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'plt_Latn-plt_Latn': {'number_of_characters': 97.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'plt_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-plt_Latn': {'number_of_characters': 97.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'pol_Latn-pol_Latn': {'number_of_characters': 76.1, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pol_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pol_Latn': {'number_of_characters': 76.1, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'por_Latn-por_Latn': {'number_of_characters': 80.12, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'por_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-por_Latn': {'number_of_characters': 80.12, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ron_Latn-ron_Latn': {'number_of_characters': 80.74, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ron_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ron_Latn': {'number_of_characters': 80.74, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'rus_Cyrl-rus_Cyrl': {'number_of_characters': 85.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'rus_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-rus_Cyrl': {'number_of_characters': 85.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'shn_Mymr-shn_Mymr': {'number_of_characters': 77.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'shn_Mymr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-shn_Mymr': {'number_of_characters': 77.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'sin_Latn-sin_Latn': {'number_of_characters': 96.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'sin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sin_Latn': {'number_of_characters': 96.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'sin_Sinh-sin_Sinh': {'number_of_characters': 71.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'sin_Sinh-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sin_Sinh': {'number_of_characters': 71.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'slk_Latn-slk_Latn': {'number_of_characters': 70.54, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'slk_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-slk_Latn': {'number_of_characters': 70.54, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'slv_Latn-slv_Latn': {'number_of_characters': 70.8, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'slv_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-slv_Latn': {'number_of_characters': 70.8, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'sna_Latn-sna_Latn': {'number_of_characters': 83.31, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'sna_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sna_Latn': {'number_of_characters': 83.31, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'snd_Arab-snd_Arab': {'number_of_characters': 65.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'snd_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-snd_Arab': {'number_of_characters': 65.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'som_Latn-som_Latn': {'number_of_characters': 92.96, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'som_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-som_Latn': {'number_of_characters': 92.96, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'sot_Latn-sot_Latn': {'number_of_characters': 85.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'sot_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sot_Latn': {'number_of_characters': 85.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'spa_Latn-spa_Latn': {'number_of_characters': 84.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'spa_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-spa_Latn': {'number_of_characters': 84.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'srp_Cyrl-srp_Cyrl': {'number_of_characters': 69.5, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'srp_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-srp_Cyrl': {'number_of_characters': 69.5, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ssw_Latn-ssw_Latn': {'number_of_characters': 83.1, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ssw_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ssw_Latn': {'number_of_characters': 83.1, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'sun_Latn-sun_Latn': {'number_of_characters': 80.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'sun_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sun_Latn': {'number_of_characters': 80.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'swe_Latn-swe_Latn': {'number_of_characters': 70.68, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'swe_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-swe_Latn': {'number_of_characters': 70.68, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'swh_Latn-swh_Latn': {'number_of_characters': 82.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'swh_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-swh_Latn': {'number_of_characters': 82.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tam_Taml-tam_Taml': {'number_of_characters': 83.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tam_Taml-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tam_Taml': {'number_of_characters': 83.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tel_Telu-tel_Telu': {'number_of_characters': 74.19, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tel_Telu-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tel_Telu': {'number_of_characters': 74.19, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tgk_Cyrl-tgk_Cyrl': {'number_of_characters': 76.28, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tgk_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tgk_Cyrl': {'number_of_characters': 76.28, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tgl_Latn-tgl_Latn': {'number_of_characters': 84.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tgl_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tgl_Latn': {'number_of_characters': 84.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tha_Thai-tha_Thai': {'number_of_characters': 61.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'tha_Thai-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tha_Thai': {'number_of_characters': 61.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'tir_Ethi-tir_Ethi': {'number_of_characters': 54.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'tir_Ethi-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tir_Ethi': {'number_of_characters': 54.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'tsn_Latn-tsn_Latn': {'number_of_characters': 89.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'tsn_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tsn_Latn': {'number_of_characters': 89.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'tso_Latn-tso_Latn': {'number_of_characters': 93.69, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'tso_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tso_Latn': {'number_of_characters': 93.69, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'tur_Latn-tur_Latn': {'number_of_characters': 73.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tur_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tur_Latn': {'number_of_characters': 73.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ukr_Cyrl-ukr_Cyrl': {'number_of_characters': 74.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ukr_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ukr_Cyrl': {'number_of_characters': 74.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'urd_Arab-urd_Arab': {'number_of_characters': 72.53, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'urd_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-urd_Arab': {'number_of_characters': 72.53, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'urd_Latn-urd_Latn': {'number_of_characters': 92.07, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'urd_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-urd_Latn': {'number_of_characters': 92.07, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'uzn_Latn-uzn_Latn': {'number_of_characters': 79.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'uzn_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-uzn_Latn': {'number_of_characters': 79.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'vie_Latn-vie_Latn': {'number_of_characters': 75.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'vie_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-vie_Latn': {'number_of_characters': 75.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'war_Latn-war_Latn': {'number_of_characters': 88.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'war_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-war_Latn': {'number_of_characters': 88.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'wol_Latn-wol_Latn': {'number_of_characters': 72.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'wol_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-wol_Latn': {'number_of_characters': 72.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'xho_Latn-xho_Latn': {'number_of_characters': 80.5, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'xho_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-xho_Latn': {'number_of_characters': 80.5, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'yor_Latn-yor_Latn': {'number_of_characters': 70.64, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'yor_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-yor_Latn': {'number_of_characters': 70.64, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'zho_Hans-zho_Hans': {'number_of_characters': 23.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}, 'zho_Hans-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zho_Hans': {'number_of_characters': 23.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}, 'zho_Hant-zho_Hant': {'number_of_characters': 23.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}, 'zho_Hant-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zho_Hant': {'number_of_characters': 23.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}, 'zsm_Latn-zsm_Latn': {'number_of_characters': 80.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'zsm_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zsm_Latn': {'number_of_characters': 80.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'zul_Latn-zul_Latn': {'number_of_characters': 78.04, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'zul_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zul_Latn': {'number_of_characters': 78.04, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'arb_Arab-arb_Latn': {'number_of_characters': 69.02, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arb_Latn-arb_Arab': {'number_of_characters': 60.55, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'ben_Beng-ben_Latn': {'number_of_characters': 76.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Latn-ben_Beng': {'number_of_characters': 71.48, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Deva-hin_Latn': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Latn-hin_Deva': {'number_of_characters': 74.62, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'npi_Deva-npi_Latn': {'number_of_characters': 73.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'npi_Latn-npi_Deva': {'number_of_characters': 68.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'sin_Sinh-sin_Latn': {'number_of_characters': 96.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'sin_Latn-sin_Sinh': {'number_of_characters': 71.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'urd_Arab-urd_Latn': {'number_of_characters': 92.07, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'urd_Latn-urd_Arab': {'number_of_characters': 72.53, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}}}} | +| [BelebeleRetrieval](https://arxiv.org/abs/2308.16884) (Lucas Bandarkar, 2023) | ['acm', 'afr', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'azj', 'bam', 'ben', 'bod', 'bul', 'cat', 'ceb', 'ces', 'ckb', 'dan', 'deu', 'ell', 'eng', 'est', 'eus', 'fin', 'fra', 'fuv', 'gaz', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kac', 'kan', 'kat', 'kaz', 'kea', 'khk', 'khm', 'kin', 'kir', 'kor', 'lao', 'lin', 'lit', 'lug', 'luo', 'lvs', 'mal', 'mar', 'mkd', 'mlt', 'mri', 'mya', 'nld', 'nob', 'npi', 'nso', 'nya', 'ory', 'pan', 'pbt', 'pes', 'plt', 'pol', 'por', 'ron', 'rus', 'shn', 'sin', 'slk', 'slv', 'sna', 'snd', 'som', 'sot', 'spa', 'srp', 'ssw', 'sun', 'swe', 'swh', 'tam', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tsn', 'tso', 'tur', 'ukr', 'urd', 'uzn', 'vie', 'war', 'wol', 'xho', 'yor', 'zho', 'zsm', 'zul'] | Retrieval | s2p | [Web, News, Written] | {'test': 521866} | {'test': {'number_of_characters': 25574620, 'num_samples': 521866, 'num_queries': 338378, 'num_documents': 183488, 'min_document_length': 4, 'average_document_length': 137.38, 'max_document_length': 237, 'unique_documents': 183488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 338378, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 183488, 'hf_subset_descriptive_stats': {'acm_Arab-acm_Arab': {'number_of_characters': 51232, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 102.98, 'max_document_length': 129, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'acm_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-acm_Arab': {'number_of_characters': 51232, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 102.98, 'max_document_length': 129, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'afr_Latn-afr_Latn': {'number_of_characters': 71217, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 143.94, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'afr_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-afr_Latn': {'number_of_characters': 71217, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 143.94, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'als_Latn-als_Latn': {'number_of_characters': 69498, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 140.41, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'als_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-als_Latn': {'number_of_characters': 69498, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 140.41, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'amh_Ethi-amh_Ethi': {'number_of_characters': 45221, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 90.67, 'max_document_length': 100, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'amh_Ethi-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-amh_Ethi': {'number_of_characters': 45221, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 90.67, 'max_document_length': 100, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'apc_Arab-apc_Arab': {'number_of_characters': 51248, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 103.02, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'apc_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-apc_Arab': {'number_of_characters': 51248, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 103.02, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Arab-arb_Arab': {'number_of_characters': 53671, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 107.98, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-arb_Arab': {'number_of_characters': 53671, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 107.98, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Latn-arb_Latn': {'number_of_characters': 61298, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 123.61, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-arb_Latn': {'number_of_characters': 61298, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 123.61, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ars_Arab-ars_Arab': {'number_of_characters': 51765, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 104.08, 'max_document_length': 119, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ars_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ars_Arab': {'number_of_characters': 51765, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 104.08, 'max_document_length': 119, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ary_Arab-ary_Arab': {'number_of_characters': 60261, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 121.49, 'max_document_length': 138, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 898, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ary_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ary_Arab': {'number_of_characters': 60261, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 121.49, 'max_document_length': 138, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 898, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arz_Arab-arz_Arab': {'number_of_characters': 52403, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 105.38, 'max_document_length': 115, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arz_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-arz_Arab': {'number_of_characters': 52403, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 105.38, 'max_document_length': 115, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'asm_Beng-asm_Beng': {'number_of_characters': 62410, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 4, 'average_document_length': 125.89, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'asm_Beng-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-asm_Beng': {'number_of_characters': 62410, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 4, 'average_document_length': 125.89, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'azj_Latn-azj_Latn': {'number_of_characters': 67137, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 135.58, 'max_document_length': 156, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'azj_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-azj_Latn': {'number_of_characters': 67137, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 135.58, 'max_document_length': 156, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bam_Latn-bam_Latn': {'number_of_characters': 66084, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 133.42, 'max_document_length': 166, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bam_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-bam_Latn': {'number_of_characters': 66084, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 133.42, 'max_document_length': 166, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Beng-ben_Beng': {'number_of_characters': 63512, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 128.15, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Beng-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ben_Beng': {'number_of_characters': 63512, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 128.15, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Latn-ben_Latn': {'number_of_characters': 68285, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 137.93, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ben_Latn': {'number_of_characters': 68285, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 137.93, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bod_Tibt-bod_Tibt': {'number_of_characters': 79188, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 160.27, 'max_document_length': 213, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bod_Tibt-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-bod_Tibt': {'number_of_characters': 79188, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 160.27, 'max_document_length': 213, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bul_Cyrl-bul_Cyrl': {'number_of_characters': 66577, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 134.43, 'max_document_length': 177, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bul_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-bul_Cyrl': {'number_of_characters': 66577, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 134.43, 'max_document_length': 177, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'cat_Latn-cat_Latn': {'number_of_characters': 68842, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 139.07, 'max_document_length': 163, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'cat_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-cat_Latn': {'number_of_characters': 68842, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 139.07, 'max_document_length': 163, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ceb_Latn-ceb_Latn': {'number_of_characters': 74053, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 149.75, 'max_document_length': 184, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ceb_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ceb_Latn': {'number_of_characters': 74053, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 149.75, 'max_document_length': 184, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ces_Latn-ces_Latn': {'number_of_characters': 61936, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 124.92, 'max_document_length': 139, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ces_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ces_Latn': {'number_of_characters': 61936, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 124.92, 'max_document_length': 139, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ckb_Arab-ckb_Arab': {'number_of_characters': 64917, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 131.03, 'max_document_length': 178, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ckb_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ckb_Arab': {'number_of_characters': 64917, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 131.03, 'max_document_length': 178, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'dan_Latn-dan_Latn': {'number_of_characters': 66648, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 134.57, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'dan_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-dan_Latn': {'number_of_characters': 66648, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 134.57, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'deu_Latn-deu_Latn': {'number_of_characters': 68768, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 138.92, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'deu_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-deu_Latn': {'number_of_characters': 68768, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 138.92, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ell_Grek-ell_Grek': {'number_of_characters': 79210, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 160.32, 'max_document_length': 212, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ell_Grek-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ell_Grek': {'number_of_characters': 79210, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 160.32, 'max_document_length': 212, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'est_Latn-est_Latn': {'number_of_characters': 61779, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 124.6, 'max_document_length': 164, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'est_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-est_Latn': {'number_of_characters': 61779, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 124.6, 'max_document_length': 164, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eus_Latn-eus_Latn': {'number_of_characters': 67979, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 137.3, 'max_document_length': 169, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eus_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-eus_Latn': {'number_of_characters': 67979, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 137.3, 'max_document_length': 169, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fin_Latn-fin_Latn': {'number_of_characters': 66234, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.73, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-fin_Latn': {'number_of_characters': 66234, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.73, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fra_Latn-fra_Latn': {'number_of_characters': 82464, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 166.98, 'max_document_length': 204, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fra_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-fra_Latn': {'number_of_characters': 82464, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 166.98, 'max_document_length': 204, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fuv_Latn-fuv_Latn': {'number_of_characters': 53555, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 107.74, 'max_document_length': 122, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fuv_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-fuv_Latn': {'number_of_characters': 53555, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 107.74, 'max_document_length': 122, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'gaz_Latn-gaz_Latn': {'number_of_characters': 78315, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 158.48, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'gaz_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-gaz_Latn': {'number_of_characters': 78315, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 158.48, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'grn_Latn-grn_Latn': {'number_of_characters': 68572, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 138.52, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'grn_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-grn_Latn': {'number_of_characters': 68572, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 138.52, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'guj_Gujr-guj_Gujr': {'number_of_characters': 57007, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 114.82, 'max_document_length': 138, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'guj_Gujr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-guj_Gujr': {'number_of_characters': 57007, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 114.82, 'max_document_length': 138, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hat_Latn-hat_Latn': {'number_of_characters': 64558, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 130.29, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hat_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hat_Latn': {'number_of_characters': 64558, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 130.29, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hau_Latn-hau_Latn': {'number_of_characters': 78240, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 158.33, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hau_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hau_Latn': {'number_of_characters': 78240, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 158.33, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'heb_Hebr-heb_Hebr': {'number_of_characters': 50598, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 101.68, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'heb_Hebr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-heb_Hebr': {'number_of_characters': 50598, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 101.68, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Deva-hin_Deva': {'number_of_characters': 66332, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.93, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Deva-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hin_Deva': {'number_of_characters': 66332, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.93, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Latn-hin_Latn': {'number_of_characters': 68307, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.97, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hin_Latn': {'number_of_characters': 68307, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.97, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hrv_Latn-hrv_Latn': {'number_of_characters': 62928, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 126.95, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hrv_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hrv_Latn': {'number_of_characters': 62928, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 126.95, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hun_Latn-hun_Latn': {'number_of_characters': 67941, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 137.22, 'max_document_length': 176, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hun_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hun_Latn': {'number_of_characters': 67941, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 137.22, 'max_document_length': 176, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hye_Armn-hye_Armn': {'number_of_characters': 68859, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 139.1, 'max_document_length': 193, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hye_Armn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hye_Armn': {'number_of_characters': 68859, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 139.1, 'max_document_length': 193, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ibo_Latn-ibo_Latn': {'number_of_characters': 66167, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 133.59, 'max_document_length': 156, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'ibo_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ibo_Latn': {'number_of_characters': 66167, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 133.59, 'max_document_length': 156, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'ilo_Latn-ilo_Latn': {'number_of_characters': 78161, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 158.17, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ilo_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ilo_Latn': {'number_of_characters': 78161, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 158.17, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ind_Latn-ind_Latn': {'number_of_characters': 74871, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 151.42, 'max_document_length': 207, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ind_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ind_Latn': {'number_of_characters': 74871, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 151.42, 'max_document_length': 207, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'isl_Latn-isl_Latn': {'number_of_characters': 70522, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 142.51, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'isl_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-isl_Latn': {'number_of_characters': 70522, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 142.51, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ita_Latn-ita_Latn': {'number_of_characters': 76124, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 153.99, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ita_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ita_Latn': {'number_of_characters': 76124, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 153.99, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'jav_Latn-jav_Latn': {'number_of_characters': 71722, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 144.97, 'max_document_length': 174, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'jav_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-jav_Latn': {'number_of_characters': 71722, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 144.97, 'max_document_length': 174, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'jpn_Jpan-jpn_Jpan': {'number_of_characters': 33187, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 66.01, 'max_document_length': 76, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'jpn_Jpan-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-jpn_Jpan': {'number_of_characters': 33187, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 66.01, 'max_document_length': 76, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kac_Latn-kac_Latn': {'number_of_characters': 89655, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 181.72, 'max_document_length': 195, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kac_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kac_Latn': {'number_of_characters': 89655, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 181.72, 'max_document_length': 195, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kan_Knda-kan_Knda': {'number_of_characters': 65899, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.04, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kan_Knda-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kan_Knda': {'number_of_characters': 65899, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.04, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kat_Geor-kat_Geor': {'number_of_characters': 68309, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.98, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kat_Geor-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kat_Geor': {'number_of_characters': 68309, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.98, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kaz_Cyrl-kaz_Cyrl': {'number_of_characters': 64657, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 130.49, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kaz_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kaz_Cyrl': {'number_of_characters': 64657, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 130.49, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kea_Latn-kea_Latn': {'number_of_characters': 69323, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 140.06, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kea_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kea_Latn': {'number_of_characters': 69323, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 140.06, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'khk_Cyrl-khk_Cyrl': {'number_of_characters': 66977, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 135.25, 'max_document_length': 162, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'khk_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-khk_Cyrl': {'number_of_characters': 66977, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 135.25, 'max_document_length': 162, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'khm_Khmr-khm_Khmr': {'number_of_characters': 69150, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 139.7, 'max_document_length': 169, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'khm_Khmr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-khm_Khmr': {'number_of_characters': 69150, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 139.7, 'max_document_length': 169, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kin_Latn-kin_Latn': {'number_of_characters': 72803, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 147.19, 'max_document_length': 194, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'kin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kin_Latn': {'number_of_characters': 72803, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 147.19, 'max_document_length': 194, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'kir_Cyrl-kir_Cyrl': {'number_of_characters': 67957, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 137.26, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kir_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kir_Cyrl': {'number_of_characters': 67957, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 137.26, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kor_Hang-kor_Hang': {'number_of_characters': 32708, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 65.02, 'max_document_length': 88, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kor_Hang-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kor_Hang': {'number_of_characters': 32708, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 65.02, 'max_document_length': 88, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lao_Laoo-lao_Laoo': {'number_of_characters': 57958, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 116.77, 'max_document_length': 142, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lao_Laoo-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lao_Laoo': {'number_of_characters': 57958, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 116.77, 'max_document_length': 142, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lin_Latn-lin_Latn': {'number_of_characters': 74223, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 150.1, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 898, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'lin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lin_Latn': {'number_of_characters': 74223, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 150.1, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 898, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'lit_Latn-lit_Latn': {'number_of_characters': 62805, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 126.7, 'max_document_length': 167, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lit_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lit_Latn': {'number_of_characters': 62805, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 126.7, 'max_document_length': 167, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lug_Latn-lug_Latn': {'number_of_characters': 71566, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 144.65, 'max_document_length': 237, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'lug_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lug_Latn': {'number_of_characters': 71566, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 144.65, 'max_document_length': 237, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'luo_Latn-luo_Latn': {'number_of_characters': 66805, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 134.9, 'max_document_length': 178, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'luo_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-luo_Latn': {'number_of_characters': 66805, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 134.9, 'max_document_length': 178, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lvs_Latn-lvs_Latn': {'number_of_characters': 63957, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 129.06, 'max_document_length': 172, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lvs_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lvs_Latn': {'number_of_characters': 63957, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 129.06, 'max_document_length': 172, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mal_Mlym-mal_Mlym': {'number_of_characters': 73599, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 148.82, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mal_Mlym-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mal_Mlym': {'number_of_characters': 73599, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 148.82, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mar_Deva-mar_Deva': {'number_of_characters': 62671, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 126.42, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'mar_Deva-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mar_Deva': {'number_of_characters': 62671, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 126.42, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'mkd_Cyrl-mkd_Cyrl': {'number_of_characters': 67588, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 136.5, 'max_document_length': 180, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mkd_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mkd_Cyrl': {'number_of_characters': 67588, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 136.5, 'max_document_length': 180, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mlt_Latn-mlt_Latn': {'number_of_characters': 68480, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 138.33, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mlt_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mlt_Latn': {'number_of_characters': 68480, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 138.33, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mri_Latn-mri_Latn': {'number_of_characters': 74519, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 150.7, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mri_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mri_Latn': {'number_of_characters': 74519, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 150.7, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mya_Mymr-mya_Mymr': {'number_of_characters': 81331, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 164.66, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mya_Mymr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mya_Mymr': {'number_of_characters': 81331, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 164.66, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nld_Latn-nld_Latn': {'number_of_characters': 68789, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 138.96, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nld_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-nld_Latn': {'number_of_characters': 68789, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 138.96, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nob_Latn-nob_Latn': {'number_of_characters': 64917, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 131.03, 'max_document_length': 168, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nob_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-nob_Latn': {'number_of_characters': 64917, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 131.03, 'max_document_length': 168, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Deva-npi_Deva': {'number_of_characters': 61183, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 123.38, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Deva-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-npi_Deva': {'number_of_characters': 61183, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 123.38, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Latn-npi_Latn': {'number_of_characters': 65683, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 132.6, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-npi_Latn': {'number_of_characters': 65683, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 132.6, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nso_Latn-nso_Latn': {'number_of_characters': 79073, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 160.03, 'max_document_length': 235, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nso_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-nso_Latn': {'number_of_characters': 79073, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 160.03, 'max_document_length': 235, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nya_Latn-nya_Latn': {'number_of_characters': 82685, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 167.44, 'max_document_length': 215, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nya_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-nya_Latn': {'number_of_characters': 82685, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 167.44, 'max_document_length': 215, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ory_Orya-ory_Orya': {'number_of_characters': 66638, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 10, 'average_document_length': 134.55, 'max_document_length': 168, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ory_Orya-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ory_Orya': {'number_of_characters': 66638, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 10, 'average_document_length': 134.55, 'max_document_length': 168, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pan_Guru-pan_Guru': {'number_of_characters': 66944, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 135.18, 'max_document_length': 157, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pan_Guru-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-pan_Guru': {'number_of_characters': 66944, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 135.18, 'max_document_length': 157, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pbt_Arab-pbt_Arab': {'number_of_characters': 61880, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 124.8, 'max_document_length': 155, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pbt_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-pbt_Arab': {'number_of_characters': 61880, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 124.8, 'max_document_length': 155, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pes_Arab-pes_Arab': {'number_of_characters': 59252, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 119.42, 'max_document_length': 152, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pes_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-pes_Arab': {'number_of_characters': 59252, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 119.42, 'max_document_length': 152, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'plt_Latn-plt_Latn': {'number_of_characters': 86472, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 175.2, 'max_document_length': 222, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'plt_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-plt_Latn': {'number_of_characters': 86472, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 175.2, 'max_document_length': 222, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pol_Latn-pol_Latn': {'number_of_characters': 67664, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 136.66, 'max_document_length': 196, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pol_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-pol_Latn': {'number_of_characters': 67664, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 136.66, 'max_document_length': 196, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'por_Latn-por_Latn': {'number_of_characters': 71281, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 144.07, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'por_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-por_Latn': {'number_of_characters': 71281, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 144.07, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ron_Latn-ron_Latn': {'number_of_characters': 71844, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 145.22, 'max_document_length': 181, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ron_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ron_Latn': {'number_of_characters': 71844, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 145.22, 'max_document_length': 181, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'rus_Cyrl-rus_Cyrl': {'number_of_characters': 75823, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 153.38, 'max_document_length': 196, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'rus_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-rus_Cyrl': {'number_of_characters': 75823, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 153.38, 'max_document_length': 196, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'shn_Mymr-shn_Mymr': {'number_of_characters': 69288, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 139.98, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'shn_Mymr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-shn_Mymr': {'number_of_characters': 69288, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 139.98, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Latn-sin_Latn': {'number_of_characters': 85996, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 174.22, 'max_document_length': 224, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sin_Latn': {'number_of_characters': 85996, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 174.22, 'max_document_length': 224, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Sinh-sin_Sinh': {'number_of_characters': 63902, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 128.95, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Sinh-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sin_Sinh': {'number_of_characters': 63902, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 128.95, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'slk_Latn-slk_Latn': {'number_of_characters': 62663, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 126.41, 'max_document_length': 146, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'slk_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-slk_Latn': {'number_of_characters': 62663, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 126.41, 'max_document_length': 146, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'slv_Latn-slv_Latn': {'number_of_characters': 62895, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 126.88, 'max_document_length': 176, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'slv_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-slv_Latn': {'number_of_characters': 62895, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 126.88, 'max_document_length': 176, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sna_Latn-sna_Latn': {'number_of_characters': 74071, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 149.78, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sna_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sna_Latn': {'number_of_characters': 74071, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 149.78, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'snd_Arab-snd_Arab': {'number_of_characters': 58057, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 116.97, 'max_document_length': 164, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'snd_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-snd_Arab': {'number_of_characters': 58057, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 116.97, 'max_document_length': 164, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'som_Latn-som_Latn': {'number_of_characters': 82838, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 167.75, 'max_document_length': 201, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'som_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-som_Latn': {'number_of_characters': 82838, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 167.75, 'max_document_length': 201, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sot_Latn-sot_Latn': {'number_of_characters': 75794, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 153.32, 'max_document_length': 186, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sot_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sot_Latn': {'number_of_characters': 75794, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 153.32, 'max_document_length': 186, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'spa_Latn-spa_Latn': {'number_of_characters': 74920, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 151.52, 'max_document_length': 180, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'spa_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-spa_Latn': {'number_of_characters': 74920, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 151.52, 'max_document_length': 180, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'srp_Cyrl-srp_Cyrl': {'number_of_characters': 61657, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 124.35, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'srp_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-srp_Cyrl': {'number_of_characters': 61657, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 124.35, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'ssw_Latn-ssw_Latn': {'number_of_characters': 73964, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 149.57, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ssw_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ssw_Latn': {'number_of_characters': 73964, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 149.57, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sun_Latn-sun_Latn': {'number_of_characters': 71320, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 144.15, 'max_document_length': 173, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sun_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sun_Latn': {'number_of_characters': 71320, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 144.15, 'max_document_length': 173, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'swe_Latn-swe_Latn': {'number_of_characters': 62785, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 126.66, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'swe_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-swe_Latn': {'number_of_characters': 62785, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 126.66, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'swh_Latn-swh_Latn': {'number_of_characters': 73480, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 148.57, 'max_document_length': 194, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'swh_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-swh_Latn': {'number_of_characters': 73480, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 148.57, 'max_document_length': 194, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tam_Taml-tam_Taml': {'number_of_characters': 73991, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 149.62, 'max_document_length': 181, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tam_Taml-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tam_Taml': {'number_of_characters': 73991, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 149.62, 'max_document_length': 181, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tel_Telu-tel_Telu': {'number_of_characters': 65945, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 133.13, 'max_document_length': 149, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tel_Telu-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tel_Telu': {'number_of_characters': 65945, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 133.13, 'max_document_length': 149, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tgk_Cyrl-tgk_Cyrl': {'number_of_characters': 67829, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 136.99, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tgk_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tgk_Cyrl': {'number_of_characters': 67829, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 136.99, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tgl_Latn-tgl_Latn': {'number_of_characters': 75087, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 151.87, 'max_document_length': 184, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tgl_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tgl_Latn': {'number_of_characters': 75087, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 151.87, 'max_document_length': 184, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tha_Thai-tha_Thai': {'number_of_characters': 54496, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 109.67, 'max_document_length': 123, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tha_Thai-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tha_Thai': {'number_of_characters': 54496, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 109.67, 'max_document_length': 123, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tir_Ethi-tir_Ethi': {'number_of_characters': 47775, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 95.9, 'max_document_length': 110, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tir_Ethi-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tir_Ethi': {'number_of_characters': 47775, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 95.9, 'max_document_length': 110, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tsn_Latn-tsn_Latn': {'number_of_characters': 79391, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 160.69, 'max_document_length': 204, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tsn_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tsn_Latn': {'number_of_characters': 79391, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 160.69, 'max_document_length': 204, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tso_Latn-tso_Latn': {'number_of_characters': 83501, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 169.11, 'max_document_length': 215, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tso_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tso_Latn': {'number_of_characters': 83501, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 169.11, 'max_document_length': 215, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tur_Latn-tur_Latn': {'number_of_characters': 65382, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 131.98, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tur_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tur_Latn': {'number_of_characters': 65382, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 131.98, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ukr_Cyrl-ukr_Cyrl': {'number_of_characters': 65850, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 132.94, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ukr_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ukr_Cyrl': {'number_of_characters': 65850, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 132.94, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Arab-urd_Arab': {'number_of_characters': 64450, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 130.07, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-urd_Arab': {'number_of_characters': 64450, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 130.07, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Latn-urd_Latn': {'number_of_characters': 82039, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 166.11, 'max_document_length': 230, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-urd_Latn': {'number_of_characters': 82039, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 166.11, 'max_document_length': 230, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'uzn_Latn-uzn_Latn': {'number_of_characters': 70828, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 143.14, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'uzn_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-uzn_Latn': {'number_of_characters': 70828, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 143.14, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'vie_Latn-vie_Latn': {'number_of_characters': 66724, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 134.73, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'vie_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-vie_Latn': {'number_of_characters': 66724, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 134.73, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'war_Latn-war_Latn': {'number_of_characters': 78444, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 158.75, 'max_document_length': 207, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'war_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-war_Latn': {'number_of_characters': 78444, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 158.75, 'max_document_length': 207, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'wol_Latn-wol_Latn': {'number_of_characters': 64521, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 130.22, 'max_document_length': 139, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'wol_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-wol_Latn': {'number_of_characters': 64521, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 130.22, 'max_document_length': 139, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'xho_Latn-xho_Latn': {'number_of_characters': 71629, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 144.78, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'xho_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-xho_Latn': {'number_of_characters': 71629, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 144.78, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'yor_Latn-yor_Latn': {'number_of_characters': 62752, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 126.59, 'max_document_length': 143, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'yor_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-yor_Latn': {'number_of_characters': 62752, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 126.59, 'max_document_length': 143, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zho_Hans-zho_Hans': {'number_of_characters': 20549, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 7, 'average_document_length': 40.11, 'max_document_length': 64, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zho_Hans-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-zho_Hans': {'number_of_characters': 20549, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 7, 'average_document_length': 40.11, 'max_document_length': 64, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zho_Hant-zho_Hant': {'number_of_characters': 19947, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 7, 'average_document_length': 38.88, 'max_document_length': 45, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zho_Hant-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-zho_Hant': {'number_of_characters': 19947, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 7, 'average_document_length': 38.88, 'max_document_length': 45, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zsm_Latn-zsm_Latn': {'number_of_characters': 72008, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 145.56, 'max_document_length': 210, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zsm_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-zsm_Latn': {'number_of_characters': 72008, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 145.56, 'max_document_length': 210, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zul_Latn-zul_Latn': {'number_of_characters': 69413, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 140.24, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zul_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-zul_Latn': {'number_of_characters': 69413, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 140.24, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Arab-arb_Latn': {'number_of_characters': 61298, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 123.61, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Latn-arb_Arab': {'number_of_characters': 53671, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 107.98, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Beng-ben_Latn': {'number_of_characters': 68285, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 137.93, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Latn-ben_Beng': {'number_of_characters': 63512, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 128.15, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Deva-hin_Latn': {'number_of_characters': 68307, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.97, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Latn-hin_Deva': {'number_of_characters': 66332, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.93, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Deva-npi_Latn': {'number_of_characters': 65683, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 132.6, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Latn-npi_Deva': {'number_of_characters': 61183, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 123.38, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Sinh-sin_Latn': {'number_of_characters': 85996, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 174.22, 'max_document_length': 224, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Latn-sin_Sinh': {'number_of_characters': 63902, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 128.95, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Arab-urd_Latn': {'number_of_characters': 82039, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 166.11, 'max_document_length': 230, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Latn-urd_Arab': {'number_of_characters': 64450, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 130.07, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}}}} | | [BengaliDocumentClassification](https://aclanthology.org/2023.eacl-main.4) | ['ben'] | Classification | s2s | [News, Written] | None | None | | [BengaliHateSpeechClassification](https://huggingface.co/datasets/bn_hate_speech) (Karim et al., 2020) | ['ben'] | Classification | s2s | [News, Written] | None | None | | [BengaliSentimentAnalysis](https://data.mendeley.com/datasets/p6zc7krs37/4) (Sazzed et al., 2020) | ['ben'] | Classification | s2s | [Reviews, Written] | None | None | @@ -52,19 +52,19 @@ The following tables give you an overview of the tasks in MTEB. | [BiorxivClusteringS2S.v2](https://api.biorxiv.org/) | ['eng'] | Clustering | s2s | [Academic, Written] | None | None | | [BlurbsClusteringP2P.v2](https://www.inf.uni-hamburg.de/en/inst/ab/lt/resources/data/germeval-2019-hmc.html) (Steffen Remus, 2019) | ['deu'] | Clustering | p2p | [Fiction, Written] | None | None | | [BlurbsClusteringS2S.v2](https://www.inf.uni-hamburg.de/en/inst/ab/lt/resources/data/germeval-2019-hmc.html) (Steffen Remus, 2019) | ['deu'] | Clustering | s2s | [Fiction, Written] | None | None | -| [BornholmBitextMining](https://aclanthology.org/W19-6138/) | ['dan'] | BitextMining | s2s | [Web, Social, Fiction, Written] | {'test': 500} | {'test': {'average_sentence1_length': 49.83, 'average_sentence2_length': 38.89, 'num_samples': 500, 'number_of_characters': 44361}} | +| [BornholmBitextMining](https://aclanthology.org/W19-6138/) | ['dan'] | BitextMining | s2s | [Web, Social, Fiction, Written] | {'test': 500} | {'test': {'num_samples': 500, 'number_of_characters': 44361, 'unique_pairs': 500, 'min_sentence1_length': 1, 'average_sentence1_length': 49.83, 'max_sentence1_length': 555, 'unique_sentence1': 497, 'min_sentence2_length': 5, 'average_sentence2_length': 38.89, 'max_sentence2_length': 453, 'unique_sentence2': 491}} | | [BrazilianToxicTweetsClassification](https://paperswithcode.com/dataset/told-br) (Joao Augusto Leite and Diego F. Silva and Kalina Bontcheva and Carolina Scarton, 2020) | ['por'] | MultilabelClassification | s2s | [Constructed, Written] | None | None | | [BrightRetrieval](https://huggingface.co/datasets/xlangai/BRIGHT) (Hongjin Su, 2024) | ['eng'] | Retrieval | s2p | [Non-fiction] | None | None | | [BulgarianStoreReviewSentimentClassfication](https://doi.org/10.7910/DVN/TXIK9P) (Georgieva-Trifonova et al., 2018) | ['bul'] | Classification | s2s | [Reviews, Written] | None | None | | [CBD](http://2019.poleval.pl/files/poleval2019.pdf) | ['pol'] | Classification | s2s | [Written, Social] | None | None | | [CDSC-E](https://aclanthology.org/P17-1073.pdf) | ['pol'] | PairClassification | s2s | [Written] | None | None | | [CDSC-R](https://aclanthology.org/P17-1073.pdf) | ['pol'] | STS | s2s | [Web, Written] | None | None | -| [CEDRClassification](https://www.sciencedirect.com/science/article/pii/S1877050921013247) (Sboev et al., 2021) | ['rus'] | MultilabelClassification | s2s | [Web, Social, Blog, Written] | {'test': 1882} | {'test': {'average_text_length': 91.21, 'number_of_characters': 171649, 'average_label_per_text': 0.62, 'num_samples': 1882, 'unique_labels': 6, 'labels': {'None': {'count': 734}, '3': {'count': 141}, '2': {'count': 170}, '1': {'count': 379}, '0': {'count': 353}, '4': {'count': 125}}}} | +| [CEDRClassification](https://www.sciencedirect.com/science/article/pii/S1877050921013247) (Sboev et al., 2021) | ['rus'] | MultilabelClassification | s2s | [Web, Social, Blog, Written] | {'test': 1882, 'train': 7528} | {'test': {'num_samples': 1882, 'number_of_characters': 171649, 'number_texts_in_train': 7, 'min_text_length': 6, 'average_text_length': 91.21, 'max_text_length': 220, 'unique_texts': 1875, 'min_labels_per_text': 0, 'average_label_per_text': 0.62, 'max_labels_per_text': 2, 'unique_labels': 6, 'labels': {'None': {'count': 734}, '3': {'count': 141}, '2': {'count': 170}, '1': {'count': 379}, '0': {'count': 353}, '4': {'count': 125}}}, 'train': {'num_samples': 7528, 'number_of_characters': 697322, 'number_texts_in_train': None, 'min_text_length': 5, 'average_text_length': 92.63, 'max_text_length': 280, 'unique_texts': 7500, 'min_labels_per_text': 0, 'average_label_per_text': 0.61, 'max_labels_per_text': 3, 'unique_labels': 6, 'labels': {'None': {'count': 3043}, '2': {'count': 607}, '0': {'count': 1569}, '3': {'count': 589}, '1': {'count': 1417}, '4': {'count': 411}}}} | | [CLSClusteringP2P.v2](https://arxiv.org/abs/2209.05034) (Yudong Li, 2022) | ['cmn'] | Clustering | p2p | [Academic, Written] | None | None | | [CLSClusteringS2S.v2](https://arxiv.org/abs/2209.05034) (Yudong Li, 2022) | ['cmn'] | Clustering | s2s | [Academic, Written] | None | None | | [CMedQAv1-reranking](https://github.com/zhangsheng93/cMedQA) (Zhang et al., 2017) | ['cmn'] | Reranking | s2s | [Medical, Written] | None | None | | [CMedQAv2-reranking](https://github.com/zhangsheng93/cMedQA2) (S. Zhang, 2018) | ['cmn'] | Reranking | s2s | | None | None | -| [COIRCodeSearchNetRetrieval](https://huggingface.co/datasets/code_search_net/) (Husain et al., 2019) | ['go', 'java', 'javascript', 'php', 'python', 'ruby'] | Retrieval | p2p | [Programming, Written] | {'test': 1056326} | {'test': {'number_of_characters': 664.77, 'num_samples': 1056326, 'num_queries': 52561, 'num_documents': 1003765, 'average_document_length': 0.0, 'average_query_length': 0.01, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 941.4, 'num_samples': 295228, 'num_queries': 14918, 'num_documents': 280310, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'number_of_characters': 748.83, 'num_samples': 68145, 'num_queries': 3291, 'num_documents': 64854, 'average_document_length': 0.0, 'average_query_length': 0.23, 'average_relevant_docs_per_query': 1.0}, 'go': {'number_of_characters': 405.38, 'num_samples': 190562, 'num_queries': 8122, 'num_documents': 182440, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'number_of_characters': 457.44, 'num_samples': 28831, 'num_queries': 1261, 'num_documents': 27570, 'average_document_length': 0.0, 'average_query_length': 0.36, 'average_relevant_docs_per_query': 1.0}, 'java': {'number_of_characters': 588.89, 'num_samples': 191821, 'num_queries': 10955, 'num_documents': 180866, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}, 'php': {'number_of_characters': 578.85, 'num_samples': 281739, 'num_queries': 14014, 'num_documents': 267725, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}}}} | +| [COIRCodeSearchNetRetrieval](https://huggingface.co/datasets/code_search_net/) (Husain et al., 2019) | ['go', 'java', 'javascript', 'php', 'python', 'ruby'] | Retrieval | p2p | [Programming, Written] | {'test': 1056326} | {'test': {'number_of_characters': 36843313, 'num_samples': 1056326, 'num_queries': 52561, 'num_documents': 1003765, 'min_document_length': 54, 'average_document_length': 34.71, 'max_document_length': 334374, 'unique_documents': 1003765, 'min_query_length': 2, 'average_query_length': 38.19, 'max_query_length': 2, 'unique_queries': 52561, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 52561, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 14574651, 'num_samples': 295228, 'num_queries': 14918, 'num_documents': 280310, 'min_document_length': 95, 'average_document_length': 49.99, 'max_document_length': 14008, 'unique_documents': 280310, 'min_query_length': 2, 'average_query_length': 37.58, 'max_query_length': 2, 'unique_queries': 14918, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14918}, 'javascript': {'number_of_characters': 2587540, 'num_samples': 68145, 'num_queries': 3291, 'num_documents': 64854, 'min_document_length': 87, 'average_document_length': 37.9, 'max_document_length': 334374, 'unique_documents': 64854, 'min_query_length': 2, 'average_query_length': 39.41, 'max_query_length': 2, 'unique_queries': 3291, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3291}, 'go': {'number_of_characters': 3641108, 'num_samples': 190562, 'num_queries': 8122, 'num_documents': 182440, 'min_document_length': 54, 'average_document_length': 17.96, 'max_document_length': 5280, 'unique_documents': 182440, 'min_query_length': 2, 'average_query_length': 44.92, 'max_query_length': 2, 'unique_queries': 8122, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 8122}, 'ruby': {'number_of_characters': 629446, 'num_samples': 28831, 'num_queries': 1261, 'num_documents': 27570, 'min_document_length': 83, 'average_document_length': 20.83, 'max_document_length': 3992, 'unique_documents': 27570, 'min_query_length': 2, 'average_query_length': 43.73, 'max_query_length': 2, 'unique_queries': 1261, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1261}, 'java': {'number_of_characters': 6791137, 'num_samples': 191821, 'num_queries': 10955, 'num_documents': 180866, 'min_document_length': 77, 'average_document_length': 35.55, 'max_document_length': 7615, 'unique_documents': 180866, 'min_query_length': 2, 'average_query_length': 33.02, 'max_query_length': 2, 'unique_queries': 10955, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10955}, 'php': {'number_of_characters': 8619431, 'num_samples': 281739, 'num_queries': 14014, 'num_documents': 267725, 'min_document_length': 94, 'average_document_length': 30.2, 'max_document_length': 4904, 'unique_documents': 267725, 'min_query_length': 2, 'average_query_length': 38.21, 'max_query_length': 2, 'unique_queries': 14014, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14014}}}} | | [CPUSpeedTask](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/blob/c8376f967d1294419be1d3eb41217d04cd3a65d3/src/seb/registered_tasks/speed.py#L83-L96) | ['eng'] | Speed | s2s | [Fiction, Written] | None | None | | [CQADupstackAndroidRetrieval](http://nlp.cis.unimelb.edu.au/resources/cqadupstack/) (Hoogeveen et al., 2015) | ['eng'] | Retrieval | s2p | | None | None | | [CQADupstackEnglishRetrieval](http://nlp.cis.unimelb.edu.au/resources/cqadupstack/) (Hoogeveen et al., 2015) | ['eng'] | Retrieval | s2p | | None | None | @@ -125,13 +125,13 @@ The following tables give you an overview of the tasks in MTEB. | [ClimateFEVERHardNegatives](https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html) (Thomas Diggelmann, 2021) | ['eng'] | Retrieval | s2p | | None | None | | [CmedqaRetrieval](https://aclanthology.org/2022.emnlp-main.357.pdf) | ['cmn'] | Retrieval | s2p | | None | None | | [Cmnli](https://huggingface.co/datasets/clue/viewer/cmnli) | ['cmn'] | PairClassification | s2s | | None | None | -| [CodeEditSearchRetrieval](https://huggingface.co/datasets/cassanof/CodeEditSearch/viewer) (Niklas Muennighoff, 2023) | ['c', 'c++', 'go', 'java', 'javascript', 'php', 'python', 'ruby', 'rust', 'scala', 'shell', 'swift', 'typescript'] | Retrieval | p2p | [Programming, Written] | {'train': 26000} | {'train': {'number_of_characters': 71.99, 'num_samples': 26000, 'num_queries': 13000, 'num_documents': 13000, 'average_document_length': 0.0, 'average_query_length': 0.01, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 70.52, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'number_of_characters': 57.88, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'typescript': {'number_of_characters': 61.09, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'go': {'number_of_characters': 71.8, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'number_of_characters': 67.9, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'java': {'number_of_characters': 63.98, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'php': {'number_of_characters': 62.93, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'c': {'number_of_characters': 98.59, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'c++': {'number_of_characters': 115.48, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'rust': {'number_of_characters': 68.5, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'swift': {'number_of_characters': 58.28, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'scala': {'number_of_characters': 65.83, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'shell': {'number_of_characters': 73.06, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}}}} | -| [CodeFeedbackMT](https://arxiv.org/abs/2402.14658) (Tianyu Zheng, 2024) | ['eng'] | Retrieval | p2p | [Programming, Written] | {'test': 79660} | {'test': {'number_of_characters': 5894.4, 'num_samples': 79660, 'num_queries': 13277, 'num_documents': 66383, 'average_document_length': 0.02, 'average_query_length': 0.33, 'average_relevant_docs_per_query': 1.0}} | -| [CodeFeedbackST](https://arxiv.org/abs/2407.02883) (Xiangyang Li, 2024) | ['eng'] | Retrieval | p2p | [Programming, Written] | {'test': 187832} | {'test': {'number_of_characters': 2246.58, 'num_samples': 187832, 'num_queries': 31306, 'num_documents': 156526, 'average_document_length': 0.01, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}} | -| [CodeSearchNetCCRetrieval](https://arxiv.org/abs/2407.02883) (Xiangyang Li, 2024) | ['go', 'java', 'javascript', 'php', 'python', 'ruby'] | Retrieval | p2p | [Programming, Written] | {'test': 1058035} | {'test': {'number_of_characters': 390.06, 'num_samples': 1058035, 'num_queries': 52561, 'num_documents': 1005474, 'average_document_length': 0.0, 'average_query_length': 0.01, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 553.79, 'num_samples': 295570, 'num_queries': 14918, 'num_documents': 280652, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'number_of_characters': 445.71, 'num_samples': 68492, 'num_queries': 3291, 'num_documents': 65201, 'average_document_length': 0.0, 'average_query_length': 0.13, 'average_relevant_docs_per_query': 1.0}, 'go': {'number_of_characters': 235.77, 'num_samples': 190857, 'num_queries': 8122, 'num_documents': 182735, 'average_document_length': 0.0, 'average_query_length': 0.03, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'number_of_characters': 268.87, 'num_samples': 28849, 'num_queries': 1261, 'num_documents': 27588, 'average_document_length': 0.0, 'average_query_length': 0.21, 'average_relevant_docs_per_query': 1.0}, 'java': {'number_of_characters': 344.53, 'num_samples': 192016, 'num_queries': 10955, 'num_documents': 181061, 'average_document_length': 0.0, 'average_query_length': 0.03, 'average_relevant_docs_per_query': 1.0}, 'php': {'number_of_characters': 338.62, 'num_samples': 282251, 'num_queries': 14014, 'num_documents': 268237, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}}}} | -| [CodeSearchNetRetrieval](https://huggingface.co/datasets/code_search_net/) (Husain et al., 2019) | ['go', 'java', 'javascript', 'php', 'python', 'ruby'] | Retrieval | p2p | [Programming, Written] | {'test': 12000} | {'test': {'number_of_characters': 325.01, 'num_samples': 12000, 'num_queries': 6000, 'num_documents': 6000, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 467.55, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.47, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'number_of_characters': 187.02, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.19, 'average_relevant_docs_per_query': 1.0}, 'go': {'number_of_characters': 126.21, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.13, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'number_of_characters': 314.82, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.31, 'average_relevant_docs_per_query': 1.0}, 'java': {'number_of_characters': 691.36, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.69, 'average_relevant_docs_per_query': 1.0}, 'php': {'number_of_characters': 163.12, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.16, 'average_relevant_docs_per_query': 1.0}}}} | -| [CodeTransOceanContest](https://arxiv.org/abs/2310.04951) (Weixiang Yan, 2023) | ['c++', 'python'] | Retrieval | p2p | [Programming, Written] | {'test': 1229} | {'test': {'number_of_characters': 2520.65, 'num_samples': 1229, 'num_queries': 221, 'num_documents': 1008, 'average_document_length': 1.5, 'average_query_length': 4.58, 'average_relevant_docs_per_query': 1.0}} | -| [CodeTransOceanDL](https://arxiv.org/abs/2310.04951) (Weixiang Yan, 2023) | ['python'] | Retrieval | p2p | [Programming, Written] | {'test': 996} | {'test': {'number_of_characters': 3347.7, 'num_samples': 996, 'num_queries': 180, 'num_documents': 816, 'average_document_length': 1.81, 'average_query_length': 10.38, 'average_relevant_docs_per_query': 1.0}} | +| [CodeEditSearchRetrieval](https://huggingface.co/datasets/cassanof/CodeEditSearch/viewer) (Niklas Muennighoff, 2023) | ['c', 'c++', 'go', 'java', 'javascript', 'php', 'python', 'ruby', 'rust', 'scala', 'shell', 'swift', 'typescript'] | Retrieval | p2p | [Programming, Written] | {'train': 26000} | {'train': {'number_of_characters': 935841, 'num_samples': 26000, 'num_queries': 13000, 'num_documents': 13000, 'min_document_length': 18, 'average_document_length': 70.99, 'max_document_length': 2532, 'unique_documents': 13000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 13000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 13000, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 70519, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 21, 'average_document_length': 69.52, 'max_document_length': 1811, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'javascript': {'number_of_characters': 57880, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 18, 'average_document_length': 56.88, 'max_document_length': 601, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'typescript': {'number_of_characters': 61092, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 60.09, 'max_document_length': 659, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'go': {'number_of_characters': 71797, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 70.8, 'max_document_length': 1529, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'ruby': {'number_of_characters': 67900, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 20, 'average_document_length': 66.9, 'max_document_length': 751, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'java': {'number_of_characters': 63984, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 23, 'average_document_length': 62.98, 'max_document_length': 807, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'php': {'number_of_characters': 62927, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 21, 'average_document_length': 61.93, 'max_document_length': 766, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'c': {'number_of_characters': 98588, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 20, 'average_document_length': 97.59, 'max_document_length': 1672, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'c++': {'number_of_characters': 115480, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 22, 'average_document_length': 114.48, 'max_document_length': 1856, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'rust': {'number_of_characters': 68503, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 67.5, 'max_document_length': 2532, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'swift': {'number_of_characters': 58279, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 57.28, 'max_document_length': 727, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'scala': {'number_of_characters': 65833, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 22, 'average_document_length': 64.83, 'max_document_length': 685, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'shell': {'number_of_characters': 73059, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 18, 'average_document_length': 72.06, 'max_document_length': 813, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}}}} | +| [CodeFeedbackMT](https://arxiv.org/abs/2402.14658) (Tianyu Zheng, 2024) | ['eng'] | Retrieval | p2p | [Programming, Written] | {'test': 79660} | {'test': {'number_of_characters': 156266302, 'num_samples': 79660, 'num_queries': 13277, 'num_documents': 66383, 'min_document_length': 127, 'average_document_length': 885.13, 'max_document_length': 32432, 'unique_documents': 66383, 'min_query_length': 2, 'average_query_length': 7344.18, 'max_query_length': 9403, 'unique_queries': 13277, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 13277}} | +| [CodeFeedbackST](https://arxiv.org/abs/2407.02883) (Xiangyang Li, 2024) | ['eng'] | Retrieval | p2p | [Programming, Written] | {'test': 187832} | {'test': {'number_of_characters': 260957682, 'num_samples': 187832, 'num_queries': 31306, 'num_documents': 156526, 'min_document_length': 26, 'average_document_length': 144.85, 'max_document_length': 13851, 'unique_documents': 156526, 'min_query_length': 1, 'average_query_length': 7611.46, 'max_query_length': 11354, 'unique_queries': 31306, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 31306}} | +| [CodeSearchNetCCRetrieval](https://arxiv.org/abs/2407.02883) (Xiangyang Li, 2024) | ['go', 'java', 'javascript', 'php', 'python', 'ruby'] | Retrieval | p2p | [Programming, Written] | {'test': 1058035} | {'test': {'number_of_characters': 22407915, 'num_samples': 1058035, 'num_queries': 52561, 'num_documents': 1005474, 'min_document_length': 23, 'average_document_length': 20.29, 'max_document_length': 214210, 'unique_documents': 1005474, 'min_query_length': 2, 'average_query_length': 38.26, 'max_query_length': 2, 'unique_queries': 52561, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 52561, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 8792958, 'num_samples': 295570, 'num_queries': 14918, 'num_documents': 280652, 'min_document_length': 38, 'average_document_length': 29.33, 'max_document_length': 8326, 'unique_documents': 280652, 'min_query_length': 2, 'average_query_length': 37.63, 'max_query_length': 2, 'unique_queries': 14918, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14918}, 'javascript': {'number_of_characters': 1590642, 'num_samples': 68492, 'num_queries': 3291, 'num_documents': 65201, 'min_document_length': 40, 'average_document_length': 22.4, 'max_document_length': 214210, 'unique_documents': 65201, 'min_query_length': 2, 'average_query_length': 39.62, 'max_query_length': 2, 'unique_queries': 3291, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3291}, 'go': {'number_of_characters': 2264134, 'num_samples': 190857, 'num_queries': 8122, 'num_documents': 182735, 'min_document_length': 23, 'average_document_length': 10.39, 'max_document_length': 3589, 'unique_documents': 182735, 'min_query_length': 2, 'average_query_length': 45.0, 'max_query_length': 2, 'unique_queries': 8122, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 8122}, 'ruby': {'number_of_characters': 391703, 'num_samples': 28849, 'num_queries': 1261, 'num_documents': 27588, 'min_document_length': 36, 'average_document_length': 12.2, 'max_document_length': 2244, 'unique_documents': 27588, 'min_query_length': 2, 'average_query_length': 43.76, 'max_query_length': 2, 'unique_queries': 1261, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1261}, 'java': {'number_of_characters': 4114584, 'num_samples': 192016, 'num_queries': 10955, 'num_documents': 181061, 'min_document_length': 38, 'average_document_length': 20.72, 'max_document_length': 5066, 'unique_documents': 181061, 'min_query_length': 2, 'average_query_length': 33.06, 'max_query_length': 2, 'unique_queries': 10955, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10955}, 'php': {'number_of_characters': 5253894, 'num_samples': 282251, 'num_queries': 14014, 'num_documents': 268237, 'min_document_length': 40, 'average_document_length': 17.59, 'max_document_length': 2995, 'unique_documents': 268237, 'min_query_length': 2, 'average_query_length': 38.28, 'max_query_length': 2, 'unique_queries': 14014, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14014}}}} | +| [CodeSearchNetRetrieval](https://huggingface.co/datasets/code_search_net/) (Husain et al., 2019) | ['go', 'java', 'javascript', 'php', 'python', 'ruby'] | Retrieval | p2p | [Programming, Written] | {'test': 12000} | {'test': {'number_of_characters': 1950074, 'num_samples': 12000, 'num_queries': 6000, 'num_documents': 6000, 'min_document_length': 2, 'average_document_length': 324.01, 'max_document_length': 17533, 'unique_documents': 6000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 6000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 6000, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 467546, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 8, 'average_document_length': 466.55, 'max_document_length': 8636, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'javascript': {'number_of_characters': 187018, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 2, 'average_document_length': 186.02, 'max_document_length': 7657, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'go': {'number_of_characters': 126213, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 14, 'average_document_length': 125.21, 'max_document_length': 1501, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'ruby': {'number_of_characters': 314818, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 5, 'average_document_length': 313.82, 'max_document_length': 17533, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'java': {'number_of_characters': 691360, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 2, 'average_document_length': 690.36, 'max_document_length': 6473, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'php': {'number_of_characters': 163119, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 5, 'average_document_length': 162.12, 'max_document_length': 1240, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}}}} | +| [CodeTransOceanContest](https://arxiv.org/abs/2310.04951) (Weixiang Yan, 2023) | ['c++', 'python'] | Retrieval | p2p | [Programming, Written] | {'test': 1229} | {'test': {'number_of_characters': 1744286, 'num_samples': 1229, 'num_queries': 221, 'num_documents': 1008, 'min_document_length': 8, 'average_document_length': 221.9, 'max_document_length': 4147, 'unique_documents': 1008, 'min_query_length': 8, 'average_query_length': 6880.58, 'max_query_length': 10852, 'unique_queries': 221, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 221}} | +| [CodeTransOceanDL](https://arxiv.org/abs/2310.04951) (Weixiang Yan, 2023) | ['python'] | Retrieval | p2p | [Programming, Written] | {'test': 996} | {'test': {'number_of_characters': 1543912, 'num_samples': 996, 'num_queries': 180, 'num_documents': 816, 'min_document_length': 376, 'average_document_length': 411.98, 'max_document_length': 8285, 'unique_documents': 816, 'min_query_length': 58, 'average_query_length': 6709.67, 'max_query_length': 8469, 'unique_queries': 180, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 180}} | | [ContractNLIConfidentialityOfAgreementLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [ContractNLIExplicitIdentificationLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | @@ -146,9 +146,9 @@ The following tables give you an overview of the tasks in MTEB. | [ContractNLISharingWithEmployeesLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [ContractNLISharingWithThirdPartiesLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [ContractNLISurvivalOfObligationsLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | -| [Core17InstructionRetrieval](https://arxiv.org/abs/2403.15246) (Orion Weller, 2024) | ['eng'] | InstructionRetrieval | s2p | [News, Written] | {'test': 19919} | {'test': {'num_samples': 19919, 'num_docs': 19899, 'num_queries': 20, 'number_of_characters': 44450333, 'average_document_length': 2233.03, 'average_query_length': 109.75, 'average_instruction_length': 295.55, 'average_changed_instruction_length': 355.2, 'average_relevant_docs_per_query': 32.7, 'average_top_ranked_per_query': 1000.0}} | +| [Core17InstructionRetrieval](https://arxiv.org/abs/2403.15246) (Orion Weller, 2024) | ['eng'] | InstructionRetrieval | s2p | [News, Written] | {'test': 19919} | {'test': {'num_samples': 19919, 'num_docs': 19899, 'num_queries': 20, 'number_of_characters': 44450333, 'min_document_length': 7, 'average_document_length': 2233.03, 'max_document_length': 2959, 'unique_docs': 19143, 'min_query_length': 55, 'average_query_length': 109.75, 'max_query_length': 278, 'unique_queries': 20, 'min_instruction_length': 102, 'average_instruction_length': 295.55, 'max_instruction_length': 811, 'unique_instructions': 20, 'min_changed_instruction_length': 151, 'average_changed_instruction_length': 355.2, 'max_changed_instruction_length': 837, 'unique_changed_instructions': 20, 'min_average_relevant_docs_per_query': 4, 'average_relevant_docs_per_query': 32.7, 'max_average_relevant_docs_per_query': 55, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}} | | [CorporateLobbyingLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | -| [CosQA](https://arxiv.org/abs/2105.13239) (Junjie Huang, 2021) | ['eng', 'python'] | Retrieval | p2p | [Programming, Written] | {'test': 21104} | {'test': {'number_of_characters': 313.95, 'num_samples': 21104, 'num_queries': 500, 'num_documents': 20604, 'average_document_length': 0.01, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}} | +| [CosQA](https://arxiv.org/abs/2105.13239) (Junjie Huang, 2021) | ['eng', 'python'] | Retrieval | p2p | [Programming, Written] | {'test': 21104} | {'test': {'number_of_characters': 5728450, 'num_samples': 21104, 'num_queries': 500, 'num_documents': 20604, 'min_document_length': 18, 'average_document_length': 0.89, 'max_document_length': 83, 'unique_documents': 20604, 'min_query_length': 88, 'average_query_length': 11420.09, 'max_query_length': 6396, 'unique_queries': 500, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 500}} | | [CovidRetrieval](https://arxiv.org/abs/2203.03367) | ['cmn'] | Retrieval | s2p | | None | None | | [CrossLingualSemanticDiscriminationWMT19](https://huggingface.co/datasets/Andrianos/clsd_wmt19_21) | ['deu', 'fra'] | Retrieval | s2s | [News, Written] | None | None | | [CrossLingualSemanticDiscriminationWMT21](https://huggingface.co/datasets/Andrianos/clsd_wmt19_21) | ['deu', 'fra'] | Retrieval | s2s | [News, Written] | None | None | @@ -175,7 +175,7 @@ The following tables give you an overview of the tasks in MTEB. | [Diversity6LegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [DuRetrieval](https://aclanthology.org/2022.emnlp-main.357.pdf) (Yifu Qiu, 2022) | ['cmn'] | Retrieval | s2p | | None | None | | [DutchBookReviewSentimentClassification](https://github.com/benjaminvdb/DBRD) (Benjamin et al., 2019) | ['nld'] | Classification | s2s | [Reviews, Written] | None | None | -| [ESCIReranking](https://github.com/amazon-science/esci-data/) (Chandan K. Reddy, 2022) | ['eng', 'jpn', 'spa'] | Reranking | s2p | [Written] | {'test': 29285} | {'test': {'num_samples': 29285, 'number_of_characters': 254538331, 'num_positive': 271416, 'num_negative': 44235, 'avg_query_len': 19.69, 'avg_positive_len': 803.92, 'avg_negative_len': 808.5, 'hf_subset_descriptive_stats': {'us': {'num_samples': 21296, 'number_of_characters': 186915609, 'num_positive': 189375, 'num_negative': 25463, 'avg_query_len': 21.44, 'avg_positive_len': 868.37, 'avg_negative_len': 864.45}, 'es': {'num_samples': 3703, 'number_of_characters': 48861389, 'num_positive': 39110, 'num_negative': 10183, 'avg_query_len': 20.68, 'avg_positive_len': 980.96, 'avg_negative_len': 1023.22}, 'jp': {'num_samples': 4286, 'number_of_characters': 18761333, 'num_positive': 42931, 'num_negative': 8589, 'avg_query_len': 10.15, 'avg_positive_len': 358.36, 'avg_negative_len': 388.08}}}} | +| [ESCIReranking](https://github.com/amazon-science/esci-data/) (Chandan K. Reddy, 2022) | ['eng', 'jpn', 'spa'] | Reranking | s2p | [Written] | {'test': 29285} | {'test': {'num_samples': 29285, 'number_of_characters': 254538331, 'num_positive': 271416, 'num_negative': 44235, 'min_query_length': 1, 'avg_query_length': 19.69, 'max_query_length': 151, 'unique_query': 29269, 'min_positive_length': 1, 'avg_positive_length': 803.92, 'max_positive_length': 8640, 'unique_positive': 217712, 'min_negative_length': 1, 'avg_negative_length': 808.5, 'max_negative_length': 4441, 'unique_negative': 39551, 'hf_subset_descriptive_stats': {'us': {'num_samples': 21296, 'number_of_characters': 186915609, 'num_positive': 189375, 'num_negative': 25463, 'min_query_length': 1, 'avg_query_length': 21.44, 'max_query_length': 151, 'unique_query': 21296, 'min_positive_length': 1, 'avg_positive_length': 868.37, 'max_positive_length': 5545, 'unique_positive': 150734, 'min_negative_length': 1, 'avg_negative_length': 864.45, 'max_negative_length': 3779, 'unique_negative': 23073}, 'es': {'num_samples': 3703, 'number_of_characters': 48861389, 'num_positive': 39110, 'num_negative': 10183, 'min_query_length': 3, 'avg_query_length': 20.68, 'max_query_length': 59, 'unique_query': 3703, 'min_positive_length': 1, 'avg_positive_length': 980.96, 'max_positive_length': 8640, 'unique_positive': 32921, 'min_negative_length': 1, 'avg_negative_length': 1023.22, 'max_negative_length': 4441, 'unique_negative': 9285}, 'jp': {'num_samples': 4286, 'number_of_characters': 18761333, 'num_positive': 42931, 'num_negative': 8589, 'min_query_length': 1, 'avg_query_length': 10.15, 'max_query_length': 60, 'unique_query': 4286, 'min_positive_length': 1, 'avg_positive_length': 358.36, 'max_positive_length': 3488, 'unique_positive': 35165, 'min_negative_length': 1, 'avg_negative_length': 388.08, 'max_negative_length': 3940, 'unique_negative': 7289}}}} | | [EcomRetrieval](https://arxiv.org/abs/2203.03367) | ['cmn'] | Retrieval | s2p | | None | None | | [EightTagsClustering.v2](https://aclanthology.org/2020.lrec-1.207.pdf) | ['pol'] | Clustering | s2s | [Social, Written] | None | None | | [EmotionClassification](https://www.aclweb.org/anthology/D18-1404) | ['eng'] | Classification | s2s | [Social, Written] | None | None | @@ -231,13 +231,13 @@ The following tables give you an overview of the tasks in MTEB. | [HotpotQAHardNegatives](https://hotpotqa.github.io/) | ['eng'] | Retrieval | s2p | [Web, Written] | None | None | | [HunSum2AbstractiveRetrieval](https://arxiv.org/abs/2404.03555) (Botond Barta, 2024) | ['hun'] | Retrieval | s2p | [News, Written] | None | None | | [IFlyTek](https://www.cluebenchmarks.com/introduce.html) | ['cmn'] | Classification | s2s | | None | None | -| [IN22ConvBitextMining](https://huggingface.co/datasets/ai4bharat/IN22-Conv) (Jay Gala, 2023) | ['asm', 'ben', 'brx', 'doi', 'eng', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] | BitextMining | s2s | [Social, Spoken, Fiction, Spoken] | {'test': 760518} | {'test': {'average_sentence1_length': 54.33, 'average_sentence2_length': 54.33, 'num_samples': 760518, 'number_of_characters': 82637104, 'hf_subset_descriptive_stats': {'asm_Beng-ben_Beng': {'average_sentence1_length': 53.75, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 155988}, 'asm_Beng-brx_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 162044}, 'asm_Beng-doi_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 167032}, 'asm_Beng-eng_Latn': {'average_sentence1_length': 53.75, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 160716}, 'asm_Beng-gom_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 156282}, 'asm_Beng-guj_Gujr': {'average_sentence1_length': 53.75, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 158269}, 'asm_Beng-hin_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 159964}, 'asm_Beng-kan_Knda': {'average_sentence1_length': 53.75, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 165177}, 'asm_Beng-kas_Arab': {'average_sentence1_length': 53.75, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 164681}, 'asm_Beng-mai_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 162408}, 'asm_Beng-mal_Mlym': {'average_sentence1_length': 53.75, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 172838}, 'asm_Beng-mar_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 162747}, 'asm_Beng-mni_Mtei': {'average_sentence1_length': 53.75, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 157316}, 'asm_Beng-npi_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 160906}, 'asm_Beng-ory_Orya': {'average_sentence1_length': 53.75, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 164223}, 'asm_Beng-pan_Guru': {'average_sentence1_length': 53.75, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 160201}, 'asm_Beng-san_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 158093}, 'asm_Beng-sat_Olck': {'average_sentence1_length': 53.75, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 169379}, 'asm_Beng-snd_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 162623}, 'asm_Beng-tam_Taml': {'average_sentence1_length': 53.75, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 174866}, 'asm_Beng-tel_Telu': {'average_sentence1_length': 53.75, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 157690}, 'asm_Beng-urd_Arab': {'average_sentence1_length': 53.75, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 161305}, 'ben_Beng-asm_Beng': {'average_sentence1_length': 50.03, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 155988}, 'ben_Beng-brx_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 156448}, 'ben_Beng-doi_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 161436}, 'ben_Beng-eng_Latn': {'average_sentence1_length': 50.03, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 155120}, 'ben_Beng-gom_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 150686}, 'ben_Beng-guj_Gujr': {'average_sentence1_length': 50.03, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 152673}, 'ben_Beng-hin_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 154368}, 'ben_Beng-kan_Knda': {'average_sentence1_length': 50.03, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 159581}, 'ben_Beng-kas_Arab': {'average_sentence1_length': 50.03, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 159085}, 'ben_Beng-mai_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 156812}, 'ben_Beng-mal_Mlym': {'average_sentence1_length': 50.03, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 167242}, 'ben_Beng-mar_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 157151}, 'ben_Beng-mni_Mtei': {'average_sentence1_length': 50.03, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 151720}, 'ben_Beng-npi_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 155310}, 'ben_Beng-ory_Orya': {'average_sentence1_length': 50.03, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 158627}, 'ben_Beng-pan_Guru': {'average_sentence1_length': 50.03, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 154605}, 'ben_Beng-san_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 152497}, 'ben_Beng-sat_Olck': {'average_sentence1_length': 50.03, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 163783}, 'ben_Beng-snd_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 157027}, 'ben_Beng-tam_Taml': {'average_sentence1_length': 50.03, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 169270}, 'ben_Beng-tel_Telu': {'average_sentence1_length': 50.03, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 152094}, 'ben_Beng-urd_Arab': {'average_sentence1_length': 50.03, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 155709}, 'brx_Deva-asm_Beng': {'average_sentence1_length': 54.06, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 162044}, 'brx_Deva-ben_Beng': {'average_sentence1_length': 54.06, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 156448}, 'brx_Deva-doi_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 167492}, 'brx_Deva-eng_Latn': {'average_sentence1_length': 54.06, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 161176}, 'brx_Deva-gom_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 156742}, 'brx_Deva-guj_Gujr': {'average_sentence1_length': 54.06, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 158729}, 'brx_Deva-hin_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 160424}, 'brx_Deva-kan_Knda': {'average_sentence1_length': 54.06, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 165637}, 'brx_Deva-kas_Arab': {'average_sentence1_length': 54.06, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 165141}, 'brx_Deva-mai_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 162868}, 'brx_Deva-mal_Mlym': {'average_sentence1_length': 54.06, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 173298}, 'brx_Deva-mar_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 163207}, 'brx_Deva-mni_Mtei': {'average_sentence1_length': 54.06, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 157776}, 'brx_Deva-npi_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 161366}, 'brx_Deva-ory_Orya': {'average_sentence1_length': 54.06, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 164683}, 'brx_Deva-pan_Guru': {'average_sentence1_length': 54.06, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 160661}, 'brx_Deva-san_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 158553}, 'brx_Deva-sat_Olck': {'average_sentence1_length': 54.06, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 169839}, 'brx_Deva-snd_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 163083}, 'brx_Deva-tam_Taml': {'average_sentence1_length': 54.06, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 175326}, 'brx_Deva-tel_Telu': {'average_sentence1_length': 54.06, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 158150}, 'brx_Deva-urd_Arab': {'average_sentence1_length': 54.06, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 161765}, 'doi_Deva-asm_Beng': {'average_sentence1_length': 57.38, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 167032}, 'doi_Deva-ben_Beng': {'average_sentence1_length': 57.38, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 161436}, 'doi_Deva-brx_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 167492}, 'doi_Deva-eng_Latn': {'average_sentence1_length': 57.38, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 166164}, 'doi_Deva-gom_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 161730}, 'doi_Deva-guj_Gujr': {'average_sentence1_length': 57.38, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 163717}, 'doi_Deva-hin_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 165412}, 'doi_Deva-kan_Knda': {'average_sentence1_length': 57.38, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 170625}, 'doi_Deva-kas_Arab': {'average_sentence1_length': 57.38, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 170129}, 'doi_Deva-mai_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 167856}, 'doi_Deva-mal_Mlym': {'average_sentence1_length': 57.38, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 178286}, 'doi_Deva-mar_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 168195}, 'doi_Deva-mni_Mtei': {'average_sentence1_length': 57.38, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 162764}, 'doi_Deva-npi_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 166354}, 'doi_Deva-ory_Orya': {'average_sentence1_length': 57.38, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 169671}, 'doi_Deva-pan_Guru': {'average_sentence1_length': 57.38, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 165649}, 'doi_Deva-san_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 163541}, 'doi_Deva-sat_Olck': {'average_sentence1_length': 57.38, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 174827}, 'doi_Deva-snd_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 168071}, 'doi_Deva-tam_Taml': {'average_sentence1_length': 57.38, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 180314}, 'doi_Deva-tel_Telu': {'average_sentence1_length': 57.38, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 163138}, 'doi_Deva-urd_Arab': {'average_sentence1_length': 57.38, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 166753}, 'eng_Latn-asm_Beng': {'average_sentence1_length': 53.18, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 160716}, 'eng_Latn-ben_Beng': {'average_sentence1_length': 53.18, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 155120}, 'eng_Latn-brx_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 161176}, 'eng_Latn-doi_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 166164}, 'eng_Latn-gom_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 155414}, 'eng_Latn-guj_Gujr': {'average_sentence1_length': 53.18, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 157401}, 'eng_Latn-hin_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 159096}, 'eng_Latn-kan_Knda': {'average_sentence1_length': 53.18, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 164309}, 'eng_Latn-kas_Arab': {'average_sentence1_length': 53.18, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 163813}, 'eng_Latn-mai_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 161540}, 'eng_Latn-mal_Mlym': {'average_sentence1_length': 53.18, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 171970}, 'eng_Latn-mar_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 161879}, 'eng_Latn-mni_Mtei': {'average_sentence1_length': 53.18, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 156448}, 'eng_Latn-npi_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 160038}, 'eng_Latn-ory_Orya': {'average_sentence1_length': 53.18, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 163355}, 'eng_Latn-pan_Guru': {'average_sentence1_length': 53.18, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 159333}, 'eng_Latn-san_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 157225}, 'eng_Latn-sat_Olck': {'average_sentence1_length': 53.18, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 168511}, 'eng_Latn-snd_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 161755}, 'eng_Latn-tam_Taml': {'average_sentence1_length': 53.18, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 173998}, 'eng_Latn-tel_Telu': {'average_sentence1_length': 53.18, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 156822}, 'eng_Latn-urd_Arab': {'average_sentence1_length': 53.18, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 160437}, 'gom_Deva-asm_Beng': {'average_sentence1_length': 50.23, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 156282}, 'gom_Deva-ben_Beng': {'average_sentence1_length': 50.23, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 150686}, 'gom_Deva-brx_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 156742}, 'gom_Deva-doi_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 161730}, 'gom_Deva-eng_Latn': {'average_sentence1_length': 50.23, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 155414}, 'gom_Deva-guj_Gujr': {'average_sentence1_length': 50.23, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 152967}, 'gom_Deva-hin_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 154662}, 'gom_Deva-kan_Knda': {'average_sentence1_length': 50.23, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 159875}, 'gom_Deva-kas_Arab': {'average_sentence1_length': 50.23, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 159379}, 'gom_Deva-mai_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 157106}, 'gom_Deva-mal_Mlym': {'average_sentence1_length': 50.23, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 167536}, 'gom_Deva-mar_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 157445}, 'gom_Deva-mni_Mtei': {'average_sentence1_length': 50.23, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 152014}, 'gom_Deva-npi_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 155604}, 'gom_Deva-ory_Orya': {'average_sentence1_length': 50.23, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 158921}, 'gom_Deva-pan_Guru': {'average_sentence1_length': 50.23, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 154899}, 'gom_Deva-san_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 152791}, 'gom_Deva-sat_Olck': {'average_sentence1_length': 50.23, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 164077}, 'gom_Deva-snd_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 157321}, 'gom_Deva-tam_Taml': {'average_sentence1_length': 50.23, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 169564}, 'gom_Deva-tel_Telu': {'average_sentence1_length': 50.23, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 152388}, 'gom_Deva-urd_Arab': {'average_sentence1_length': 50.23, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 156003}, 'guj_Gujr-asm_Beng': {'average_sentence1_length': 51.55, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 158269}, 'guj_Gujr-ben_Beng': {'average_sentence1_length': 51.55, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 152673}, 'guj_Gujr-brx_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 158729}, 'guj_Gujr-doi_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 163717}, 'guj_Gujr-eng_Latn': {'average_sentence1_length': 51.55, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 157401}, 'guj_Gujr-gom_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 152967}, 'guj_Gujr-hin_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 156649}, 'guj_Gujr-kan_Knda': {'average_sentence1_length': 51.55, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 161862}, 'guj_Gujr-kas_Arab': {'average_sentence1_length': 51.55, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 161366}, 'guj_Gujr-mai_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 159093}, 'guj_Gujr-mal_Mlym': {'average_sentence1_length': 51.55, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 169523}, 'guj_Gujr-mar_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 159432}, 'guj_Gujr-mni_Mtei': {'average_sentence1_length': 51.55, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 154001}, 'guj_Gujr-npi_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 157591}, 'guj_Gujr-ory_Orya': {'average_sentence1_length': 51.55, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 160908}, 'guj_Gujr-pan_Guru': {'average_sentence1_length': 51.55, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 156886}, 'guj_Gujr-san_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 154778}, 'guj_Gujr-sat_Olck': {'average_sentence1_length': 51.55, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 166064}, 'guj_Gujr-snd_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 159308}, 'guj_Gujr-tam_Taml': {'average_sentence1_length': 51.55, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 171551}, 'guj_Gujr-tel_Telu': {'average_sentence1_length': 51.55, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 154375}, 'guj_Gujr-urd_Arab': {'average_sentence1_length': 51.55, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 157990}, 'hin_Deva-asm_Beng': {'average_sentence1_length': 52.68, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 159964}, 'hin_Deva-ben_Beng': {'average_sentence1_length': 52.68, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 154368}, 'hin_Deva-brx_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 160424}, 'hin_Deva-doi_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 165412}, 'hin_Deva-eng_Latn': {'average_sentence1_length': 52.68, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 159096}, 'hin_Deva-gom_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 154662}, 'hin_Deva-guj_Gujr': {'average_sentence1_length': 52.68, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 156649}, 'hin_Deva-kan_Knda': {'average_sentence1_length': 52.68, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 163557}, 'hin_Deva-kas_Arab': {'average_sentence1_length': 52.68, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 163061}, 'hin_Deva-mai_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 160788}, 'hin_Deva-mal_Mlym': {'average_sentence1_length': 52.68, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 171218}, 'hin_Deva-mar_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 161127}, 'hin_Deva-mni_Mtei': {'average_sentence1_length': 52.68, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 155696}, 'hin_Deva-npi_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 159286}, 'hin_Deva-ory_Orya': {'average_sentence1_length': 52.68, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 162603}, 'hin_Deva-pan_Guru': {'average_sentence1_length': 52.68, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 158581}, 'hin_Deva-san_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 156473}, 'hin_Deva-sat_Olck': {'average_sentence1_length': 52.68, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 167759}, 'hin_Deva-snd_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 161003}, 'hin_Deva-tam_Taml': {'average_sentence1_length': 52.68, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 173246}, 'hin_Deva-tel_Telu': {'average_sentence1_length': 52.68, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 156070}, 'hin_Deva-urd_Arab': {'average_sentence1_length': 52.68, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 159685}, 'kan_Knda-asm_Beng': {'average_sentence1_length': 56.14, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 165177}, 'kan_Knda-ben_Beng': {'average_sentence1_length': 56.14, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 159581}, 'kan_Knda-brx_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 165637}, 'kan_Knda-doi_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 170625}, 'kan_Knda-eng_Latn': {'average_sentence1_length': 56.14, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 164309}, 'kan_Knda-gom_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 159875}, 'kan_Knda-guj_Gujr': {'average_sentence1_length': 56.14, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 161862}, 'kan_Knda-hin_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 163557}, 'kan_Knda-kas_Arab': {'average_sentence1_length': 56.14, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 168274}, 'kan_Knda-mai_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 166001}, 'kan_Knda-mal_Mlym': {'average_sentence1_length': 56.14, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 176431}, 'kan_Knda-mar_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 166340}, 'kan_Knda-mni_Mtei': {'average_sentence1_length': 56.14, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 160909}, 'kan_Knda-npi_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 164499}, 'kan_Knda-ory_Orya': {'average_sentence1_length': 56.14, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 167816}, 'kan_Knda-pan_Guru': {'average_sentence1_length': 56.14, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 163794}, 'kan_Knda-san_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 161686}, 'kan_Knda-sat_Olck': {'average_sentence1_length': 56.14, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 172972}, 'kan_Knda-snd_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 166216}, 'kan_Knda-tam_Taml': {'average_sentence1_length': 56.14, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 178459}, 'kan_Knda-tel_Telu': {'average_sentence1_length': 56.14, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 161283}, 'kan_Knda-urd_Arab': {'average_sentence1_length': 56.14, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 164898}, 'kas_Arab-asm_Beng': {'average_sentence1_length': 55.81, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 164681}, 'kas_Arab-ben_Beng': {'average_sentence1_length': 55.81, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 159085}, 'kas_Arab-brx_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 165141}, 'kas_Arab-doi_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 170129}, 'kas_Arab-eng_Latn': {'average_sentence1_length': 55.81, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 163813}, 'kas_Arab-gom_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 159379}, 'kas_Arab-guj_Gujr': {'average_sentence1_length': 55.81, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 161366}, 'kas_Arab-hin_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 163061}, 'kas_Arab-kan_Knda': {'average_sentence1_length': 55.81, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 168274}, 'kas_Arab-mai_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 165505}, 'kas_Arab-mal_Mlym': {'average_sentence1_length': 55.81, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 175935}, 'kas_Arab-mar_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 165844}, 'kas_Arab-mni_Mtei': {'average_sentence1_length': 55.81, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 160413}, 'kas_Arab-npi_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 164003}, 'kas_Arab-ory_Orya': {'average_sentence1_length': 55.81, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 167320}, 'kas_Arab-pan_Guru': {'average_sentence1_length': 55.81, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 163298}, 'kas_Arab-san_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 161190}, 'kas_Arab-sat_Olck': {'average_sentence1_length': 55.81, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 172476}, 'kas_Arab-snd_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 165720}, 'kas_Arab-tam_Taml': {'average_sentence1_length': 55.81, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 177963}, 'kas_Arab-tel_Telu': {'average_sentence1_length': 55.81, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 160787}, 'kas_Arab-urd_Arab': {'average_sentence1_length': 55.81, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 164402}, 'mai_Deva-asm_Beng': {'average_sentence1_length': 54.3, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 162408}, 'mai_Deva-ben_Beng': {'average_sentence1_length': 54.3, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 156812}, 'mai_Deva-brx_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 162868}, 'mai_Deva-doi_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 167856}, 'mai_Deva-eng_Latn': {'average_sentence1_length': 54.3, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 161540}, 'mai_Deva-gom_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 157106}, 'mai_Deva-guj_Gujr': {'average_sentence1_length': 54.3, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 159093}, 'mai_Deva-hin_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 160788}, 'mai_Deva-kan_Knda': {'average_sentence1_length': 54.3, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 166001}, 'mai_Deva-kas_Arab': {'average_sentence1_length': 54.3, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 165505}, 'mai_Deva-mal_Mlym': {'average_sentence1_length': 54.3, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 173662}, 'mai_Deva-mar_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 163571}, 'mai_Deva-mni_Mtei': {'average_sentence1_length': 54.3, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 158140}, 'mai_Deva-npi_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 161730}, 'mai_Deva-ory_Orya': {'average_sentence1_length': 54.3, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 165047}, 'mai_Deva-pan_Guru': {'average_sentence1_length': 54.3, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 161025}, 'mai_Deva-san_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 158917}, 'mai_Deva-sat_Olck': {'average_sentence1_length': 54.3, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 170203}, 'mai_Deva-snd_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 163447}, 'mai_Deva-tam_Taml': {'average_sentence1_length': 54.3, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 175690}, 'mai_Deva-tel_Telu': {'average_sentence1_length': 54.3, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 158514}, 'mai_Deva-urd_Arab': {'average_sentence1_length': 54.3, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 162129}, 'mal_Mlym-asm_Beng': {'average_sentence1_length': 61.24, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 172838}, 'mal_Mlym-ben_Beng': {'average_sentence1_length': 61.24, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 167242}, 'mal_Mlym-brx_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 173298}, 'mal_Mlym-doi_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 178286}, 'mal_Mlym-eng_Latn': {'average_sentence1_length': 61.24, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 171970}, 'mal_Mlym-gom_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 167536}, 'mal_Mlym-guj_Gujr': {'average_sentence1_length': 61.24, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 169523}, 'mal_Mlym-hin_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 171218}, 'mal_Mlym-kan_Knda': {'average_sentence1_length': 61.24, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 176431}, 'mal_Mlym-kas_Arab': {'average_sentence1_length': 61.24, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 175935}, 'mal_Mlym-mai_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 173662}, 'mal_Mlym-mar_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 174001}, 'mal_Mlym-mni_Mtei': {'average_sentence1_length': 61.24, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 168570}, 'mal_Mlym-npi_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 172160}, 'mal_Mlym-ory_Orya': {'average_sentence1_length': 61.24, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 175477}, 'mal_Mlym-pan_Guru': {'average_sentence1_length': 61.24, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 171455}, 'mal_Mlym-san_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 169347}, 'mal_Mlym-sat_Olck': {'average_sentence1_length': 61.24, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 180633}, 'mal_Mlym-snd_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 173877}, 'mal_Mlym-tam_Taml': {'average_sentence1_length': 61.24, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 186120}, 'mal_Mlym-tel_Telu': {'average_sentence1_length': 61.24, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 168944}, 'mal_Mlym-urd_Arab': {'average_sentence1_length': 61.24, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 172559}, 'mar_Deva-asm_Beng': {'average_sentence1_length': 54.53, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 162747}, 'mar_Deva-ben_Beng': {'average_sentence1_length': 54.53, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 157151}, 'mar_Deva-brx_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 163207}, 'mar_Deva-doi_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 168195}, 'mar_Deva-eng_Latn': {'average_sentence1_length': 54.53, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 161879}, 'mar_Deva-gom_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 157445}, 'mar_Deva-guj_Gujr': {'average_sentence1_length': 54.53, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 159432}, 'mar_Deva-hin_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 161127}, 'mar_Deva-kan_Knda': {'average_sentence1_length': 54.53, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 166340}, 'mar_Deva-kas_Arab': {'average_sentence1_length': 54.53, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 165844}, 'mar_Deva-mai_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 163571}, 'mar_Deva-mal_Mlym': {'average_sentence1_length': 54.53, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 174001}, 'mar_Deva-mni_Mtei': {'average_sentence1_length': 54.53, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 158479}, 'mar_Deva-npi_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 162069}, 'mar_Deva-ory_Orya': {'average_sentence1_length': 54.53, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 165386}, 'mar_Deva-pan_Guru': {'average_sentence1_length': 54.53, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 161364}, 'mar_Deva-san_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 159256}, 'mar_Deva-sat_Olck': {'average_sentence1_length': 54.53, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 170542}, 'mar_Deva-snd_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 163786}, 'mar_Deva-tam_Taml': {'average_sentence1_length': 54.53, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 176029}, 'mar_Deva-tel_Telu': {'average_sentence1_length': 54.53, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 158853}, 'mar_Deva-urd_Arab': {'average_sentence1_length': 54.53, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 162468}, 'mni_Mtei-asm_Beng': {'average_sentence1_length': 50.91, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 157316}, 'mni_Mtei-ben_Beng': {'average_sentence1_length': 50.91, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 151720}, 'mni_Mtei-brx_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 157776}, 'mni_Mtei-doi_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 162764}, 'mni_Mtei-eng_Latn': {'average_sentence1_length': 50.91, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 156448}, 'mni_Mtei-gom_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 152014}, 'mni_Mtei-guj_Gujr': {'average_sentence1_length': 50.91, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 154001}, 'mni_Mtei-hin_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 155696}, 'mni_Mtei-kan_Knda': {'average_sentence1_length': 50.91, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 160909}, 'mni_Mtei-kas_Arab': {'average_sentence1_length': 50.91, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 160413}, 'mni_Mtei-mai_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 158140}, 'mni_Mtei-mal_Mlym': {'average_sentence1_length': 50.91, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 168570}, 'mni_Mtei-mar_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 158479}, 'mni_Mtei-npi_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 156638}, 'mni_Mtei-ory_Orya': {'average_sentence1_length': 50.91, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 159955}, 'mni_Mtei-pan_Guru': {'average_sentence1_length': 50.91, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 155933}, 'mni_Mtei-san_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 153825}, 'mni_Mtei-sat_Olck': {'average_sentence1_length': 50.91, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 165111}, 'mni_Mtei-snd_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 158355}, 'mni_Mtei-tam_Taml': {'average_sentence1_length': 50.91, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 170598}, 'mni_Mtei-tel_Telu': {'average_sentence1_length': 50.91, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 153422}, 'mni_Mtei-urd_Arab': {'average_sentence1_length': 50.91, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 157037}, 'npi_Deva-asm_Beng': {'average_sentence1_length': 53.3, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 160906}, 'npi_Deva-ben_Beng': {'average_sentence1_length': 53.3, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 155310}, 'npi_Deva-brx_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 161366}, 'npi_Deva-doi_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 166354}, 'npi_Deva-eng_Latn': {'average_sentence1_length': 53.3, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 160038}, 'npi_Deva-gom_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 155604}, 'npi_Deva-guj_Gujr': {'average_sentence1_length': 53.3, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 157591}, 'npi_Deva-hin_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 159286}, 'npi_Deva-kan_Knda': {'average_sentence1_length': 53.3, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 164499}, 'npi_Deva-kas_Arab': {'average_sentence1_length': 53.3, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 164003}, 'npi_Deva-mai_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 161730}, 'npi_Deva-mal_Mlym': {'average_sentence1_length': 53.3, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 172160}, 'npi_Deva-mar_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 162069}, 'npi_Deva-mni_Mtei': {'average_sentence1_length': 53.3, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 156638}, 'npi_Deva-ory_Orya': {'average_sentence1_length': 53.3, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 163545}, 'npi_Deva-pan_Guru': {'average_sentence1_length': 53.3, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 159523}, 'npi_Deva-san_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 157415}, 'npi_Deva-sat_Olck': {'average_sentence1_length': 53.3, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 168701}, 'npi_Deva-snd_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 161945}, 'npi_Deva-tam_Taml': {'average_sentence1_length': 53.3, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 174188}, 'npi_Deva-tel_Telu': {'average_sentence1_length': 53.3, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 157012}, 'npi_Deva-urd_Arab': {'average_sentence1_length': 53.3, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 160627}, 'ory_Orya-asm_Beng': {'average_sentence1_length': 55.51, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 164223}, 'ory_Orya-ben_Beng': {'average_sentence1_length': 55.51, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 158627}, 'ory_Orya-brx_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 164683}, 'ory_Orya-doi_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 169671}, 'ory_Orya-eng_Latn': {'average_sentence1_length': 55.51, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 163355}, 'ory_Orya-gom_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 158921}, 'ory_Orya-guj_Gujr': {'average_sentence1_length': 55.51, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 160908}, 'ory_Orya-hin_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 162603}, 'ory_Orya-kan_Knda': {'average_sentence1_length': 55.51, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 167816}, 'ory_Orya-kas_Arab': {'average_sentence1_length': 55.51, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 167320}, 'ory_Orya-mai_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 165047}, 'ory_Orya-mal_Mlym': {'average_sentence1_length': 55.51, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 175477}, 'ory_Orya-mar_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 165386}, 'ory_Orya-mni_Mtei': {'average_sentence1_length': 55.51, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 159955}, 'ory_Orya-npi_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 163545}, 'ory_Orya-pan_Guru': {'average_sentence1_length': 55.51, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 162840}, 'ory_Orya-san_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 160732}, 'ory_Orya-sat_Olck': {'average_sentence1_length': 55.51, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 172018}, 'ory_Orya-snd_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 165262}, 'ory_Orya-tam_Taml': {'average_sentence1_length': 55.51, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 177505}, 'ory_Orya-tel_Telu': {'average_sentence1_length': 55.51, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 160329}, 'ory_Orya-urd_Arab': {'average_sentence1_length': 55.51, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 163944}, 'pan_Guru-asm_Beng': {'average_sentence1_length': 52.83, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 160201}, 'pan_Guru-ben_Beng': {'average_sentence1_length': 52.83, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 154605}, 'pan_Guru-brx_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 160661}, 'pan_Guru-doi_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 165649}, 'pan_Guru-eng_Latn': {'average_sentence1_length': 52.83, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 159333}, 'pan_Guru-gom_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 154899}, 'pan_Guru-guj_Gujr': {'average_sentence1_length': 52.83, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 156886}, 'pan_Guru-hin_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 158581}, 'pan_Guru-kan_Knda': {'average_sentence1_length': 52.83, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 163794}, 'pan_Guru-kas_Arab': {'average_sentence1_length': 52.83, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 163298}, 'pan_Guru-mai_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 161025}, 'pan_Guru-mal_Mlym': {'average_sentence1_length': 52.83, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 171455}, 'pan_Guru-mar_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 161364}, 'pan_Guru-mni_Mtei': {'average_sentence1_length': 52.83, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 155933}, 'pan_Guru-npi_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 159523}, 'pan_Guru-ory_Orya': {'average_sentence1_length': 52.83, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 162840}, 'pan_Guru-san_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 156710}, 'pan_Guru-sat_Olck': {'average_sentence1_length': 52.83, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 167996}, 'pan_Guru-snd_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 161240}, 'pan_Guru-tam_Taml': {'average_sentence1_length': 52.83, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 173483}, 'pan_Guru-tel_Telu': {'average_sentence1_length': 52.83, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 156307}, 'pan_Guru-urd_Arab': {'average_sentence1_length': 52.83, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 159922}, 'san_Deva-asm_Beng': {'average_sentence1_length': 51.43, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 158093}, 'san_Deva-ben_Beng': {'average_sentence1_length': 51.43, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 152497}, 'san_Deva-brx_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 158553}, 'san_Deva-doi_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 163541}, 'san_Deva-eng_Latn': {'average_sentence1_length': 51.43, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 157225}, 'san_Deva-gom_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 152791}, 'san_Deva-guj_Gujr': {'average_sentence1_length': 51.43, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 154778}, 'san_Deva-hin_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 156473}, 'san_Deva-kan_Knda': {'average_sentence1_length': 51.43, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 161686}, 'san_Deva-kas_Arab': {'average_sentence1_length': 51.43, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 161190}, 'san_Deva-mai_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 158917}, 'san_Deva-mal_Mlym': {'average_sentence1_length': 51.43, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 169347}, 'san_Deva-mar_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 159256}, 'san_Deva-mni_Mtei': {'average_sentence1_length': 51.43, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 153825}, 'san_Deva-npi_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 157415}, 'san_Deva-ory_Orya': {'average_sentence1_length': 51.43, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 160732}, 'san_Deva-pan_Guru': {'average_sentence1_length': 51.43, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 156710}, 'san_Deva-sat_Olck': {'average_sentence1_length': 51.43, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 165888}, 'san_Deva-snd_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 159132}, 'san_Deva-tam_Taml': {'average_sentence1_length': 51.43, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 171375}, 'san_Deva-tel_Telu': {'average_sentence1_length': 51.43, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 154199}, 'san_Deva-urd_Arab': {'average_sentence1_length': 51.43, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 157814}, 'sat_Olck-asm_Beng': {'average_sentence1_length': 58.94, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 169379}, 'sat_Olck-ben_Beng': {'average_sentence1_length': 58.94, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 163783}, 'sat_Olck-brx_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 169839}, 'sat_Olck-doi_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 174827}, 'sat_Olck-eng_Latn': {'average_sentence1_length': 58.94, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 168511}, 'sat_Olck-gom_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 164077}, 'sat_Olck-guj_Gujr': {'average_sentence1_length': 58.94, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 166064}, 'sat_Olck-hin_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 167759}, 'sat_Olck-kan_Knda': {'average_sentence1_length': 58.94, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 172972}, 'sat_Olck-kas_Arab': {'average_sentence1_length': 58.94, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 172476}, 'sat_Olck-mai_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 170203}, 'sat_Olck-mal_Mlym': {'average_sentence1_length': 58.94, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 180633}, 'sat_Olck-mar_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 170542}, 'sat_Olck-mni_Mtei': {'average_sentence1_length': 58.94, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 165111}, 'sat_Olck-npi_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 168701}, 'sat_Olck-ory_Orya': {'average_sentence1_length': 58.94, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 172018}, 'sat_Olck-pan_Guru': {'average_sentence1_length': 58.94, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 167996}, 'sat_Olck-san_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 165888}, 'sat_Olck-snd_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 170418}, 'sat_Olck-tam_Taml': {'average_sentence1_length': 58.94, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 182661}, 'sat_Olck-tel_Telu': {'average_sentence1_length': 58.94, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 165485}, 'sat_Olck-urd_Arab': {'average_sentence1_length': 58.94, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 169100}, 'snd_Deva-asm_Beng': {'average_sentence1_length': 54.45, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 162623}, 'snd_Deva-ben_Beng': {'average_sentence1_length': 54.45, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 157027}, 'snd_Deva-brx_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 163083}, 'snd_Deva-doi_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 168071}, 'snd_Deva-eng_Latn': {'average_sentence1_length': 54.45, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 161755}, 'snd_Deva-gom_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 157321}, 'snd_Deva-guj_Gujr': {'average_sentence1_length': 54.45, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 159308}, 'snd_Deva-hin_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 161003}, 'snd_Deva-kan_Knda': {'average_sentence1_length': 54.45, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 166216}, 'snd_Deva-kas_Arab': {'average_sentence1_length': 54.45, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 165720}, 'snd_Deva-mai_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 163447}, 'snd_Deva-mal_Mlym': {'average_sentence1_length': 54.45, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 173877}, 'snd_Deva-mar_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 163786}, 'snd_Deva-mni_Mtei': {'average_sentence1_length': 54.45, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 158355}, 'snd_Deva-npi_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 161945}, 'snd_Deva-ory_Orya': {'average_sentence1_length': 54.45, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 165262}, 'snd_Deva-pan_Guru': {'average_sentence1_length': 54.45, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 161240}, 'snd_Deva-san_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 159132}, 'snd_Deva-sat_Olck': {'average_sentence1_length': 54.45, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 170418}, 'snd_Deva-tam_Taml': {'average_sentence1_length': 54.45, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 175905}, 'snd_Deva-tel_Telu': {'average_sentence1_length': 54.45, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 158729}, 'snd_Deva-urd_Arab': {'average_sentence1_length': 54.45, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 162344}, 'tam_Taml-asm_Beng': {'average_sentence1_length': 62.59, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 174866}, 'tam_Taml-ben_Beng': {'average_sentence1_length': 62.59, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 169270}, 'tam_Taml-brx_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 175326}, 'tam_Taml-doi_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 180314}, 'tam_Taml-eng_Latn': {'average_sentence1_length': 62.59, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 173998}, 'tam_Taml-gom_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 169564}, 'tam_Taml-guj_Gujr': {'average_sentence1_length': 62.59, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 171551}, 'tam_Taml-hin_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 173246}, 'tam_Taml-kan_Knda': {'average_sentence1_length': 62.59, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 178459}, 'tam_Taml-kas_Arab': {'average_sentence1_length': 62.59, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 177963}, 'tam_Taml-mai_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 175690}, 'tam_Taml-mal_Mlym': {'average_sentence1_length': 62.59, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 186120}, 'tam_Taml-mar_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 176029}, 'tam_Taml-mni_Mtei': {'average_sentence1_length': 62.59, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 170598}, 'tam_Taml-npi_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 174188}, 'tam_Taml-ory_Orya': {'average_sentence1_length': 62.59, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 177505}, 'tam_Taml-pan_Guru': {'average_sentence1_length': 62.59, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 173483}, 'tam_Taml-san_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 171375}, 'tam_Taml-sat_Olck': {'average_sentence1_length': 62.59, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 182661}, 'tam_Taml-snd_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 175905}, 'tam_Taml-tel_Telu': {'average_sentence1_length': 62.59, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 170972}, 'tam_Taml-urd_Arab': {'average_sentence1_length': 62.59, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 174587}, 'tel_Telu-asm_Beng': {'average_sentence1_length': 51.16, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 157690}, 'tel_Telu-ben_Beng': {'average_sentence1_length': 51.16, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 152094}, 'tel_Telu-brx_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 158150}, 'tel_Telu-doi_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 163138}, 'tel_Telu-eng_Latn': {'average_sentence1_length': 51.16, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 156822}, 'tel_Telu-gom_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 152388}, 'tel_Telu-guj_Gujr': {'average_sentence1_length': 51.16, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 154375}, 'tel_Telu-hin_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 156070}, 'tel_Telu-kan_Knda': {'average_sentence1_length': 51.16, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 161283}, 'tel_Telu-kas_Arab': {'average_sentence1_length': 51.16, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 160787}, 'tel_Telu-mai_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 158514}, 'tel_Telu-mal_Mlym': {'average_sentence1_length': 51.16, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 168944}, 'tel_Telu-mar_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 158853}, 'tel_Telu-mni_Mtei': {'average_sentence1_length': 51.16, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 153422}, 'tel_Telu-npi_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 157012}, 'tel_Telu-ory_Orya': {'average_sentence1_length': 51.16, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 160329}, 'tel_Telu-pan_Guru': {'average_sentence1_length': 51.16, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 156307}, 'tel_Telu-san_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 154199}, 'tel_Telu-sat_Olck': {'average_sentence1_length': 51.16, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 165485}, 'tel_Telu-snd_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 158729}, 'tel_Telu-tam_Taml': {'average_sentence1_length': 51.16, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 170972}, 'tel_Telu-urd_Arab': {'average_sentence1_length': 51.16, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 157411}, 'urd_Arab-asm_Beng': {'average_sentence1_length': 53.57, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 161305}, 'urd_Arab-ben_Beng': {'average_sentence1_length': 53.57, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 155709}, 'urd_Arab-brx_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 161765}, 'urd_Arab-doi_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 166753}, 'urd_Arab-eng_Latn': {'average_sentence1_length': 53.57, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 160437}, 'urd_Arab-gom_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 156003}, 'urd_Arab-guj_Gujr': {'average_sentence1_length': 53.57, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 157990}, 'urd_Arab-hin_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 159685}, 'urd_Arab-kan_Knda': {'average_sentence1_length': 53.57, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 164898}, 'urd_Arab-kas_Arab': {'average_sentence1_length': 53.57, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 164402}, 'urd_Arab-mai_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 162129}, 'urd_Arab-mal_Mlym': {'average_sentence1_length': 53.57, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 172559}, 'urd_Arab-mar_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 162468}, 'urd_Arab-mni_Mtei': {'average_sentence1_length': 53.57, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 157037}, 'urd_Arab-npi_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 160627}, 'urd_Arab-ory_Orya': {'average_sentence1_length': 53.57, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 163944}, 'urd_Arab-pan_Guru': {'average_sentence1_length': 53.57, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 159922}, 'urd_Arab-san_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 157814}, 'urd_Arab-sat_Olck': {'average_sentence1_length': 53.57, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 169100}, 'urd_Arab-snd_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 162344}, 'urd_Arab-tam_Taml': {'average_sentence1_length': 53.57, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 174587}, 'urd_Arab-tel_Telu': {'average_sentence1_length': 53.57, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 157411}}}} | -| [IN22GenBitextMining](https://huggingface.co/datasets/ai4bharat/IN22-Gen) (Jay Gala, 2023) | ['asm', 'ben', 'brx', 'doi', 'eng', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] | BitextMining | s2s | [Web, Legal, Government, News, Religious, Non-fiction, Written] | None | None | -| [IWSLT2017BitextMining](https://aclanthology.org/2017.iwslt-1.1/) | ['ara', 'cmn', 'deu', 'eng', 'fra', 'ita', 'jpn', 'kor', 'nld', 'ron'] | BitextMining | s2s | [Non-fiction, Fiction, Written] | None | None | +| [IN22ConvBitextMining](https://huggingface.co/datasets/ai4bharat/IN22-Conv) (Jay Gala, 2023) | ['asm', 'ben', 'brx', 'doi', 'eng', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] | BitextMining | s2s | [Social, Spoken, Fiction, Spoken] | {'test': 760518} | {'test': {'num_samples': 760518, 'number_of_characters': 82637104, 'unique_pairs': 759283, 'min_sentence1_length': 3, 'average_sentence1_length': 54.33, 'max_sentence1_length': 239, 'unique_sentence1': 34430, 'min_sentence2_length': 3, 'average_sentence2_length': 54.33, 'max_sentence2_length': 239, 'unique_sentence2': 34430, 'hf_subset_descriptive_stats': {'asm_Beng-ben_Beng': {'num_samples': 1503, 'number_of_characters': 155988, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'asm_Beng-brx_Deva': {'num_samples': 1503, 'number_of_characters': 162044, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'asm_Beng-doi_Deva': {'num_samples': 1503, 'number_of_characters': 167032, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'asm_Beng-eng_Latn': {'num_samples': 1503, 'number_of_characters': 160716, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'asm_Beng-gom_Deva': {'num_samples': 1503, 'number_of_characters': 156282, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'asm_Beng-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 158269, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'asm_Beng-hin_Deva': {'num_samples': 1503, 'number_of_characters': 159964, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'asm_Beng-kan_Knda': {'num_samples': 1503, 'number_of_characters': 165177, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'asm_Beng-kas_Arab': {'num_samples': 1503, 'number_of_characters': 164681, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'asm_Beng-mai_Deva': {'num_samples': 1503, 'number_of_characters': 162408, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'asm_Beng-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 172838, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'asm_Beng-mar_Deva': {'num_samples': 1503, 'number_of_characters': 162747, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'asm_Beng-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 157316, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'asm_Beng-npi_Deva': {'num_samples': 1503, 'number_of_characters': 160906, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'asm_Beng-ory_Orya': {'num_samples': 1503, 'number_of_characters': 164223, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'asm_Beng-pan_Guru': {'num_samples': 1503, 'number_of_characters': 160201, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'asm_Beng-san_Deva': {'num_samples': 1503, 'number_of_characters': 158093, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'asm_Beng-sat_Olck': {'num_samples': 1503, 'number_of_characters': 169379, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'asm_Beng-snd_Deva': {'num_samples': 1503, 'number_of_characters': 162623, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'asm_Beng-tam_Taml': {'num_samples': 1503, 'number_of_characters': 174866, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'asm_Beng-tel_Telu': {'num_samples': 1503, 'number_of_characters': 157690, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'asm_Beng-urd_Arab': {'num_samples': 1503, 'number_of_characters': 161305, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'ben_Beng-asm_Beng': {'num_samples': 1503, 'number_of_characters': 155988, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'ben_Beng-brx_Deva': {'num_samples': 1503, 'number_of_characters': 156448, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'ben_Beng-doi_Deva': {'num_samples': 1503, 'number_of_characters': 161436, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'ben_Beng-eng_Latn': {'num_samples': 1503, 'number_of_characters': 155120, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'ben_Beng-gom_Deva': {'num_samples': 1503, 'number_of_characters': 150686, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'ben_Beng-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 152673, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'ben_Beng-hin_Deva': {'num_samples': 1503, 'number_of_characters': 154368, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'ben_Beng-kan_Knda': {'num_samples': 1503, 'number_of_characters': 159581, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'ben_Beng-kas_Arab': {'num_samples': 1503, 'number_of_characters': 159085, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'ben_Beng-mai_Deva': {'num_samples': 1503, 'number_of_characters': 156812, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'ben_Beng-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 167242, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'ben_Beng-mar_Deva': {'num_samples': 1503, 'number_of_characters': 157151, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'ben_Beng-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 151720, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'ben_Beng-npi_Deva': {'num_samples': 1503, 'number_of_characters': 155310, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'ben_Beng-ory_Orya': {'num_samples': 1503, 'number_of_characters': 158627, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'ben_Beng-pan_Guru': {'num_samples': 1503, 'number_of_characters': 154605, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'ben_Beng-san_Deva': {'num_samples': 1503, 'number_of_characters': 152497, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'ben_Beng-sat_Olck': {'num_samples': 1503, 'number_of_characters': 163783, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'ben_Beng-snd_Deva': {'num_samples': 1503, 'number_of_characters': 157027, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'ben_Beng-tam_Taml': {'num_samples': 1503, 'number_of_characters': 169270, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'ben_Beng-tel_Telu': {'num_samples': 1503, 'number_of_characters': 152094, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'ben_Beng-urd_Arab': {'num_samples': 1503, 'number_of_characters': 155709, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'brx_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 162044, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'brx_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 156448, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'brx_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 167492, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'brx_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 161176, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'brx_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 156742, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'brx_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 158729, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'brx_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 160424, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'brx_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 165637, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'brx_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 165141, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'brx_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 162868, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'brx_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 173298, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'brx_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 163207, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'brx_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 157776, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'brx_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 161366, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'brx_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 164683, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'brx_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 160661, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'brx_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 158553, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'brx_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 169839, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'brx_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 163083, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'brx_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 175326, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'brx_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 158150, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'brx_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 161765, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'doi_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 167032, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'doi_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 161436, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'doi_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 167492, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'doi_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 166164, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'doi_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 161730, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'doi_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 163717, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'doi_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 165412, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'doi_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 170625, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'doi_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 170129, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'doi_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 167856, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'doi_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 178286, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'doi_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 168195, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'doi_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 162764, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'doi_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 166354, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'doi_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 169671, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'doi_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 165649, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'doi_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 163541, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'doi_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 174827, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'doi_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 168071, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'doi_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 180314, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'doi_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 163138, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'doi_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 166753, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'eng_Latn-asm_Beng': {'num_samples': 1503, 'number_of_characters': 160716, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'eng_Latn-ben_Beng': {'num_samples': 1503, 'number_of_characters': 155120, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'eng_Latn-brx_Deva': {'num_samples': 1503, 'number_of_characters': 161176, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'eng_Latn-doi_Deva': {'num_samples': 1503, 'number_of_characters': 166164, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'eng_Latn-gom_Deva': {'num_samples': 1503, 'number_of_characters': 155414, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'eng_Latn-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 157401, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'eng_Latn-hin_Deva': {'num_samples': 1503, 'number_of_characters': 159096, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'eng_Latn-kan_Knda': {'num_samples': 1503, 'number_of_characters': 164309, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'eng_Latn-kas_Arab': {'num_samples': 1503, 'number_of_characters': 163813, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'eng_Latn-mai_Deva': {'num_samples': 1503, 'number_of_characters': 161540, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'eng_Latn-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 171970, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'eng_Latn-mar_Deva': {'num_samples': 1503, 'number_of_characters': 161879, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'eng_Latn-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 156448, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'eng_Latn-npi_Deva': {'num_samples': 1503, 'number_of_characters': 160038, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'eng_Latn-ory_Orya': {'num_samples': 1503, 'number_of_characters': 163355, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'eng_Latn-pan_Guru': {'num_samples': 1503, 'number_of_characters': 159333, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'eng_Latn-san_Deva': {'num_samples': 1503, 'number_of_characters': 157225, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'eng_Latn-sat_Olck': {'num_samples': 1503, 'number_of_characters': 168511, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'eng_Latn-snd_Deva': {'num_samples': 1503, 'number_of_characters': 161755, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'eng_Latn-tam_Taml': {'num_samples': 1503, 'number_of_characters': 173998, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'eng_Latn-tel_Telu': {'num_samples': 1503, 'number_of_characters': 156822, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'eng_Latn-urd_Arab': {'num_samples': 1503, 'number_of_characters': 160437, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'gom_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 156282, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'gom_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 150686, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'gom_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 156742, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'gom_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 161730, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'gom_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 155414, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'gom_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 152967, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'gom_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 154662, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'gom_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 159875, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'gom_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 159379, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'gom_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 157106, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'gom_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 167536, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'gom_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 157445, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'gom_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 152014, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'gom_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 155604, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'gom_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 158921, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'gom_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 154899, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'gom_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 152791, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'gom_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 164077, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'gom_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 157321, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'gom_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 169564, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'gom_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 152388, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'gom_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 156003, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'guj_Gujr-asm_Beng': {'num_samples': 1503, 'number_of_characters': 158269, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'guj_Gujr-ben_Beng': {'num_samples': 1503, 'number_of_characters': 152673, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'guj_Gujr-brx_Deva': {'num_samples': 1503, 'number_of_characters': 158729, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'guj_Gujr-doi_Deva': {'num_samples': 1503, 'number_of_characters': 163717, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'guj_Gujr-eng_Latn': {'num_samples': 1503, 'number_of_characters': 157401, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'guj_Gujr-gom_Deva': {'num_samples': 1503, 'number_of_characters': 152967, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'guj_Gujr-hin_Deva': {'num_samples': 1503, 'number_of_characters': 156649, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'guj_Gujr-kan_Knda': {'num_samples': 1503, 'number_of_characters': 161862, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'guj_Gujr-kas_Arab': {'num_samples': 1503, 'number_of_characters': 161366, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'guj_Gujr-mai_Deva': {'num_samples': 1503, 'number_of_characters': 159093, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'guj_Gujr-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 169523, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'guj_Gujr-mar_Deva': {'num_samples': 1503, 'number_of_characters': 159432, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'guj_Gujr-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 154001, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'guj_Gujr-npi_Deva': {'num_samples': 1503, 'number_of_characters': 157591, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'guj_Gujr-ory_Orya': {'num_samples': 1503, 'number_of_characters': 160908, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'guj_Gujr-pan_Guru': {'num_samples': 1503, 'number_of_characters': 156886, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'guj_Gujr-san_Deva': {'num_samples': 1503, 'number_of_characters': 154778, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'guj_Gujr-sat_Olck': {'num_samples': 1503, 'number_of_characters': 166064, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'guj_Gujr-snd_Deva': {'num_samples': 1503, 'number_of_characters': 159308, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'guj_Gujr-tam_Taml': {'num_samples': 1503, 'number_of_characters': 171551, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'guj_Gujr-tel_Telu': {'num_samples': 1503, 'number_of_characters': 154375, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'guj_Gujr-urd_Arab': {'num_samples': 1503, 'number_of_characters': 157990, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'hin_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 159964, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'hin_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 154368, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'hin_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 160424, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'hin_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 165412, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'hin_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 159096, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'hin_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 154662, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'hin_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 156649, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'hin_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 163557, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'hin_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 163061, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'hin_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 160788, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'hin_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 171218, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'hin_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 161127, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'hin_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 155696, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'hin_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 159286, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'hin_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 162603, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'hin_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 158581, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'hin_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 156473, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'hin_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 167759, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'hin_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 161003, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'hin_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 173246, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'hin_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 156070, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'hin_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 159685, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'kan_Knda-asm_Beng': {'num_samples': 1503, 'number_of_characters': 165177, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'kan_Knda-ben_Beng': {'num_samples': 1503, 'number_of_characters': 159581, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'kan_Knda-brx_Deva': {'num_samples': 1503, 'number_of_characters': 165637, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'kan_Knda-doi_Deva': {'num_samples': 1503, 'number_of_characters': 170625, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'kan_Knda-eng_Latn': {'num_samples': 1503, 'number_of_characters': 164309, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'kan_Knda-gom_Deva': {'num_samples': 1503, 'number_of_characters': 159875, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'kan_Knda-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 161862, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'kan_Knda-hin_Deva': {'num_samples': 1503, 'number_of_characters': 163557, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'kan_Knda-kas_Arab': {'num_samples': 1503, 'number_of_characters': 168274, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'kan_Knda-mai_Deva': {'num_samples': 1503, 'number_of_characters': 166001, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'kan_Knda-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 176431, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'kan_Knda-mar_Deva': {'num_samples': 1503, 'number_of_characters': 166340, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'kan_Knda-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 160909, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'kan_Knda-npi_Deva': {'num_samples': 1503, 'number_of_characters': 164499, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'kan_Knda-ory_Orya': {'num_samples': 1503, 'number_of_characters': 167816, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'kan_Knda-pan_Guru': {'num_samples': 1503, 'number_of_characters': 163794, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'kan_Knda-san_Deva': {'num_samples': 1503, 'number_of_characters': 161686, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'kan_Knda-sat_Olck': {'num_samples': 1503, 'number_of_characters': 172972, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'kan_Knda-snd_Deva': {'num_samples': 1503, 'number_of_characters': 166216, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'kan_Knda-tam_Taml': {'num_samples': 1503, 'number_of_characters': 178459, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'kan_Knda-tel_Telu': {'num_samples': 1503, 'number_of_characters': 161283, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'kan_Knda-urd_Arab': {'num_samples': 1503, 'number_of_characters': 164898, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'kas_Arab-asm_Beng': {'num_samples': 1503, 'number_of_characters': 164681, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'kas_Arab-ben_Beng': {'num_samples': 1503, 'number_of_characters': 159085, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'kas_Arab-brx_Deva': {'num_samples': 1503, 'number_of_characters': 165141, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'kas_Arab-doi_Deva': {'num_samples': 1503, 'number_of_characters': 170129, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'kas_Arab-eng_Latn': {'num_samples': 1503, 'number_of_characters': 163813, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'kas_Arab-gom_Deva': {'num_samples': 1503, 'number_of_characters': 159379, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'kas_Arab-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 161366, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'kas_Arab-hin_Deva': {'num_samples': 1503, 'number_of_characters': 163061, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'kas_Arab-kan_Knda': {'num_samples': 1503, 'number_of_characters': 168274, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'kas_Arab-mai_Deva': {'num_samples': 1503, 'number_of_characters': 165505, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'kas_Arab-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 175935, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'kas_Arab-mar_Deva': {'num_samples': 1503, 'number_of_characters': 165844, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'kas_Arab-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 160413, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'kas_Arab-npi_Deva': {'num_samples': 1503, 'number_of_characters': 164003, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'kas_Arab-ory_Orya': {'num_samples': 1503, 'number_of_characters': 167320, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'kas_Arab-pan_Guru': {'num_samples': 1503, 'number_of_characters': 163298, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'kas_Arab-san_Deva': {'num_samples': 1503, 'number_of_characters': 161190, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'kas_Arab-sat_Olck': {'num_samples': 1503, 'number_of_characters': 172476, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'kas_Arab-snd_Deva': {'num_samples': 1503, 'number_of_characters': 165720, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'kas_Arab-tam_Taml': {'num_samples': 1503, 'number_of_characters': 177963, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'kas_Arab-tel_Telu': {'num_samples': 1503, 'number_of_characters': 160787, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'kas_Arab-urd_Arab': {'num_samples': 1503, 'number_of_characters': 164402, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'mai_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 162408, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'mai_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 156812, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'mai_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 162868, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'mai_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 167856, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'mai_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 161540, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'mai_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 157106, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'mai_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 159093, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'mai_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 160788, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'mai_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 166001, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'mai_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 165505, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'mai_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 173662, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'mai_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 163571, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'mai_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 158140, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'mai_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 161730, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'mai_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 165047, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'mai_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 161025, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'mai_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 158917, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'mai_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 170203, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'mai_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 163447, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'mai_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 175690, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'mai_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 158514, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'mai_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 162129, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'mal_Mlym-asm_Beng': {'num_samples': 1503, 'number_of_characters': 172838, 'unique_pairs': 1498, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'mal_Mlym-ben_Beng': {'num_samples': 1503, 'number_of_characters': 167242, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'mal_Mlym-brx_Deva': {'num_samples': 1503, 'number_of_characters': 173298, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'mal_Mlym-doi_Deva': {'num_samples': 1503, 'number_of_characters': 178286, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'mal_Mlym-eng_Latn': {'num_samples': 1503, 'number_of_characters': 171970, 'unique_pairs': 1499, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'mal_Mlym-gom_Deva': {'num_samples': 1503, 'number_of_characters': 167536, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'mal_Mlym-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 169523, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'mal_Mlym-hin_Deva': {'num_samples': 1503, 'number_of_characters': 171218, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'mal_Mlym-kan_Knda': {'num_samples': 1503, 'number_of_characters': 176431, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'mal_Mlym-kas_Arab': {'num_samples': 1503, 'number_of_characters': 175935, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'mal_Mlym-mai_Deva': {'num_samples': 1503, 'number_of_characters': 173662, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'mal_Mlym-mar_Deva': {'num_samples': 1503, 'number_of_characters': 174001, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'mal_Mlym-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 168570, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'mal_Mlym-npi_Deva': {'num_samples': 1503, 'number_of_characters': 172160, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'mal_Mlym-ory_Orya': {'num_samples': 1503, 'number_of_characters': 175477, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'mal_Mlym-pan_Guru': {'num_samples': 1503, 'number_of_characters': 171455, 'unique_pairs': 1498, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'mal_Mlym-san_Deva': {'num_samples': 1503, 'number_of_characters': 169347, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'mal_Mlym-sat_Olck': {'num_samples': 1503, 'number_of_characters': 180633, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'mal_Mlym-snd_Deva': {'num_samples': 1503, 'number_of_characters': 173877, 'unique_pairs': 1499, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'mal_Mlym-tam_Taml': {'num_samples': 1503, 'number_of_characters': 186120, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'mal_Mlym-tel_Telu': {'num_samples': 1503, 'number_of_characters': 168944, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'mal_Mlym-urd_Arab': {'num_samples': 1503, 'number_of_characters': 172559, 'unique_pairs': 1499, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'mar_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 162747, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'mar_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 157151, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'mar_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 163207, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'mar_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 168195, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'mar_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 161879, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'mar_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 157445, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'mar_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 159432, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'mar_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 161127, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'mar_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 166340, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'mar_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 165844, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'mar_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 163571, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'mar_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 174001, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'mar_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 158479, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'mar_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 162069, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'mar_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 165386, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'mar_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 161364, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'mar_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 159256, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'mar_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 170542, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'mar_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 163786, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'mar_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 176029, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'mar_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 158853, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'mar_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 162468, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'mni_Mtei-asm_Beng': {'num_samples': 1503, 'number_of_characters': 157316, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'mni_Mtei-ben_Beng': {'num_samples': 1503, 'number_of_characters': 151720, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'mni_Mtei-brx_Deva': {'num_samples': 1503, 'number_of_characters': 157776, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'mni_Mtei-doi_Deva': {'num_samples': 1503, 'number_of_characters': 162764, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'mni_Mtei-eng_Latn': {'num_samples': 1503, 'number_of_characters': 156448, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'mni_Mtei-gom_Deva': {'num_samples': 1503, 'number_of_characters': 152014, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'mni_Mtei-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 154001, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'mni_Mtei-hin_Deva': {'num_samples': 1503, 'number_of_characters': 155696, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'mni_Mtei-kan_Knda': {'num_samples': 1503, 'number_of_characters': 160909, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'mni_Mtei-kas_Arab': {'num_samples': 1503, 'number_of_characters': 160413, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'mni_Mtei-mai_Deva': {'num_samples': 1503, 'number_of_characters': 158140, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'mni_Mtei-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 168570, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'mni_Mtei-mar_Deva': {'num_samples': 1503, 'number_of_characters': 158479, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'mni_Mtei-npi_Deva': {'num_samples': 1503, 'number_of_characters': 156638, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'mni_Mtei-ory_Orya': {'num_samples': 1503, 'number_of_characters': 159955, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'mni_Mtei-pan_Guru': {'num_samples': 1503, 'number_of_characters': 155933, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'mni_Mtei-san_Deva': {'num_samples': 1503, 'number_of_characters': 153825, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'mni_Mtei-sat_Olck': {'num_samples': 1503, 'number_of_characters': 165111, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'mni_Mtei-snd_Deva': {'num_samples': 1503, 'number_of_characters': 158355, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'mni_Mtei-tam_Taml': {'num_samples': 1503, 'number_of_characters': 170598, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'mni_Mtei-tel_Telu': {'num_samples': 1503, 'number_of_characters': 153422, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'mni_Mtei-urd_Arab': {'num_samples': 1503, 'number_of_characters': 157037, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'npi_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 160906, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'npi_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 155310, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'npi_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 161366, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'npi_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 166354, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'npi_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 160038, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'npi_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 155604, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'npi_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 157591, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'npi_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 159286, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'npi_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 164499, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'npi_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 164003, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'npi_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 161730, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'npi_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 172160, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'npi_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 162069, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'npi_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 156638, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'npi_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 163545, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'npi_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 159523, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'npi_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 157415, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'npi_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 168701, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'npi_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 161945, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'npi_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 174188, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'npi_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 157012, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'npi_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 160627, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'ory_Orya-asm_Beng': {'num_samples': 1503, 'number_of_characters': 164223, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'ory_Orya-ben_Beng': {'num_samples': 1503, 'number_of_characters': 158627, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'ory_Orya-brx_Deva': {'num_samples': 1503, 'number_of_characters': 164683, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'ory_Orya-doi_Deva': {'num_samples': 1503, 'number_of_characters': 169671, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'ory_Orya-eng_Latn': {'num_samples': 1503, 'number_of_characters': 163355, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'ory_Orya-gom_Deva': {'num_samples': 1503, 'number_of_characters': 158921, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'ory_Orya-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 160908, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'ory_Orya-hin_Deva': {'num_samples': 1503, 'number_of_characters': 162603, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'ory_Orya-kan_Knda': {'num_samples': 1503, 'number_of_characters': 167816, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'ory_Orya-kas_Arab': {'num_samples': 1503, 'number_of_characters': 167320, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'ory_Orya-mai_Deva': {'num_samples': 1503, 'number_of_characters': 165047, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'ory_Orya-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 175477, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'ory_Orya-mar_Deva': {'num_samples': 1503, 'number_of_characters': 165386, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'ory_Orya-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 159955, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'ory_Orya-npi_Deva': {'num_samples': 1503, 'number_of_characters': 163545, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'ory_Orya-pan_Guru': {'num_samples': 1503, 'number_of_characters': 162840, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'ory_Orya-san_Deva': {'num_samples': 1503, 'number_of_characters': 160732, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'ory_Orya-sat_Olck': {'num_samples': 1503, 'number_of_characters': 172018, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'ory_Orya-snd_Deva': {'num_samples': 1503, 'number_of_characters': 165262, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'ory_Orya-tam_Taml': {'num_samples': 1503, 'number_of_characters': 177505, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'ory_Orya-tel_Telu': {'num_samples': 1503, 'number_of_characters': 160329, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'ory_Orya-urd_Arab': {'num_samples': 1503, 'number_of_characters': 163944, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'pan_Guru-asm_Beng': {'num_samples': 1503, 'number_of_characters': 160201, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'pan_Guru-ben_Beng': {'num_samples': 1503, 'number_of_characters': 154605, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'pan_Guru-brx_Deva': {'num_samples': 1503, 'number_of_characters': 160661, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'pan_Guru-doi_Deva': {'num_samples': 1503, 'number_of_characters': 165649, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'pan_Guru-eng_Latn': {'num_samples': 1503, 'number_of_characters': 159333, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'pan_Guru-gom_Deva': {'num_samples': 1503, 'number_of_characters': 154899, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'pan_Guru-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 156886, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'pan_Guru-hin_Deva': {'num_samples': 1503, 'number_of_characters': 158581, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'pan_Guru-kan_Knda': {'num_samples': 1503, 'number_of_characters': 163794, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'pan_Guru-kas_Arab': {'num_samples': 1503, 'number_of_characters': 163298, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'pan_Guru-mai_Deva': {'num_samples': 1503, 'number_of_characters': 161025, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'pan_Guru-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 171455, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'pan_Guru-mar_Deva': {'num_samples': 1503, 'number_of_characters': 161364, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'pan_Guru-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 155933, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'pan_Guru-npi_Deva': {'num_samples': 1503, 'number_of_characters': 159523, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'pan_Guru-ory_Orya': {'num_samples': 1503, 'number_of_characters': 162840, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'pan_Guru-san_Deva': {'num_samples': 1503, 'number_of_characters': 156710, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'pan_Guru-sat_Olck': {'num_samples': 1503, 'number_of_characters': 167996, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'pan_Guru-snd_Deva': {'num_samples': 1503, 'number_of_characters': 161240, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'pan_Guru-tam_Taml': {'num_samples': 1503, 'number_of_characters': 173483, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'pan_Guru-tel_Telu': {'num_samples': 1503, 'number_of_characters': 156307, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'pan_Guru-urd_Arab': {'num_samples': 1503, 'number_of_characters': 159922, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'san_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 158093, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'san_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 152497, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'san_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 158553, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'san_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 163541, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'san_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 157225, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'san_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 152791, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'san_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 154778, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'san_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 156473, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'san_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 161686, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'san_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 161190, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'san_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 158917, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'san_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 169347, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'san_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 159256, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'san_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 153825, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'san_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 157415, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'san_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 160732, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'san_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 156710, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'san_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 165888, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'san_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 159132, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'san_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 171375, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'san_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 154199, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'san_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 157814, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'sat_Olck-asm_Beng': {'num_samples': 1503, 'number_of_characters': 169379, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'sat_Olck-ben_Beng': {'num_samples': 1503, 'number_of_characters': 163783, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'sat_Olck-brx_Deva': {'num_samples': 1503, 'number_of_characters': 169839, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'sat_Olck-doi_Deva': {'num_samples': 1503, 'number_of_characters': 174827, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'sat_Olck-eng_Latn': {'num_samples': 1503, 'number_of_characters': 168511, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'sat_Olck-gom_Deva': {'num_samples': 1503, 'number_of_characters': 164077, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'sat_Olck-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 166064, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'sat_Olck-hin_Deva': {'num_samples': 1503, 'number_of_characters': 167759, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'sat_Olck-kan_Knda': {'num_samples': 1503, 'number_of_characters': 172972, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'sat_Olck-kas_Arab': {'num_samples': 1503, 'number_of_characters': 172476, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'sat_Olck-mai_Deva': {'num_samples': 1503, 'number_of_characters': 170203, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'sat_Olck-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 180633, 'unique_pairs': 1501, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'sat_Olck-mar_Deva': {'num_samples': 1503, 'number_of_characters': 170542, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'sat_Olck-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 165111, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'sat_Olck-npi_Deva': {'num_samples': 1503, 'number_of_characters': 168701, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'sat_Olck-ory_Orya': {'num_samples': 1503, 'number_of_characters': 172018, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'sat_Olck-pan_Guru': {'num_samples': 1503, 'number_of_characters': 167996, 'unique_pairs': 1501, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'sat_Olck-san_Deva': {'num_samples': 1503, 'number_of_characters': 165888, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'sat_Olck-snd_Deva': {'num_samples': 1503, 'number_of_characters': 170418, 'unique_pairs': 1501, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'sat_Olck-tam_Taml': {'num_samples': 1503, 'number_of_characters': 182661, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'sat_Olck-tel_Telu': {'num_samples': 1503, 'number_of_characters': 165485, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'sat_Olck-urd_Arab': {'num_samples': 1503, 'number_of_characters': 169100, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'snd_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 162623, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'snd_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 157027, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'snd_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 163083, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'snd_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 168071, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'snd_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 161755, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'snd_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 157321, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'snd_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 159308, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'snd_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 161003, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'snd_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 166216, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'snd_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 165720, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'snd_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 163447, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'snd_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 173877, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'snd_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 163786, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'snd_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 158355, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'snd_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 161945, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'snd_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 165262, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'snd_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 161240, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'snd_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 159132, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'snd_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 170418, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'snd_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 175905, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'snd_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 158729, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'snd_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 162344, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'tam_Taml-asm_Beng': {'num_samples': 1503, 'number_of_characters': 174866, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'tam_Taml-ben_Beng': {'num_samples': 1503, 'number_of_characters': 169270, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'tam_Taml-brx_Deva': {'num_samples': 1503, 'number_of_characters': 175326, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'tam_Taml-doi_Deva': {'num_samples': 1503, 'number_of_characters': 180314, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'tam_Taml-eng_Latn': {'num_samples': 1503, 'number_of_characters': 173998, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'tam_Taml-gom_Deva': {'num_samples': 1503, 'number_of_characters': 169564, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'tam_Taml-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 171551, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'tam_Taml-hin_Deva': {'num_samples': 1503, 'number_of_characters': 173246, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'tam_Taml-kan_Knda': {'num_samples': 1503, 'number_of_characters': 178459, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'tam_Taml-kas_Arab': {'num_samples': 1503, 'number_of_characters': 177963, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'tam_Taml-mai_Deva': {'num_samples': 1503, 'number_of_characters': 175690, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'tam_Taml-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 186120, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'tam_Taml-mar_Deva': {'num_samples': 1503, 'number_of_characters': 176029, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'tam_Taml-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 170598, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'tam_Taml-npi_Deva': {'num_samples': 1503, 'number_of_characters': 174188, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'tam_Taml-ory_Orya': {'num_samples': 1503, 'number_of_characters': 177505, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'tam_Taml-pan_Guru': {'num_samples': 1503, 'number_of_characters': 173483, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'tam_Taml-san_Deva': {'num_samples': 1503, 'number_of_characters': 171375, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'tam_Taml-sat_Olck': {'num_samples': 1503, 'number_of_characters': 182661, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'tam_Taml-snd_Deva': {'num_samples': 1503, 'number_of_characters': 175905, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'tam_Taml-tel_Telu': {'num_samples': 1503, 'number_of_characters': 170972, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'tam_Taml-urd_Arab': {'num_samples': 1503, 'number_of_characters': 174587, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'tel_Telu-asm_Beng': {'num_samples': 1503, 'number_of_characters': 157690, 'unique_pairs': 1499, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'tel_Telu-ben_Beng': {'num_samples': 1503, 'number_of_characters': 152094, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'tel_Telu-brx_Deva': {'num_samples': 1503, 'number_of_characters': 158150, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'tel_Telu-doi_Deva': {'num_samples': 1503, 'number_of_characters': 163138, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'tel_Telu-eng_Latn': {'num_samples': 1503, 'number_of_characters': 156822, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'tel_Telu-gom_Deva': {'num_samples': 1503, 'number_of_characters': 152388, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'tel_Telu-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 154375, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'tel_Telu-hin_Deva': {'num_samples': 1503, 'number_of_characters': 156070, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'tel_Telu-kan_Knda': {'num_samples': 1503, 'number_of_characters': 161283, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'tel_Telu-kas_Arab': {'num_samples': 1503, 'number_of_characters': 160787, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'tel_Telu-mai_Deva': {'num_samples': 1503, 'number_of_characters': 158514, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'tel_Telu-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 168944, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'tel_Telu-mar_Deva': {'num_samples': 1503, 'number_of_characters': 158853, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'tel_Telu-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 153422, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'tel_Telu-npi_Deva': {'num_samples': 1503, 'number_of_characters': 157012, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'tel_Telu-ory_Orya': {'num_samples': 1503, 'number_of_characters': 160329, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'tel_Telu-pan_Guru': {'num_samples': 1503, 'number_of_characters': 156307, 'unique_pairs': 1499, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'tel_Telu-san_Deva': {'num_samples': 1503, 'number_of_characters': 154199, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'tel_Telu-sat_Olck': {'num_samples': 1503, 'number_of_characters': 165485, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'tel_Telu-snd_Deva': {'num_samples': 1503, 'number_of_characters': 158729, 'unique_pairs': 1499, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'tel_Telu-tam_Taml': {'num_samples': 1503, 'number_of_characters': 170972, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'tel_Telu-urd_Arab': {'num_samples': 1503, 'number_of_characters': 157411, 'unique_pairs': 1499, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'urd_Arab-asm_Beng': {'num_samples': 1503, 'number_of_characters': 161305, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'urd_Arab-ben_Beng': {'num_samples': 1503, 'number_of_characters': 155709, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'urd_Arab-brx_Deva': {'num_samples': 1503, 'number_of_characters': 161765, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'urd_Arab-doi_Deva': {'num_samples': 1503, 'number_of_characters': 166753, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'urd_Arab-eng_Latn': {'num_samples': 1503, 'number_of_characters': 160437, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'urd_Arab-gom_Deva': {'num_samples': 1503, 'number_of_characters': 156003, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'urd_Arab-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 157990, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'urd_Arab-hin_Deva': {'num_samples': 1503, 'number_of_characters': 159685, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'urd_Arab-kan_Knda': {'num_samples': 1503, 'number_of_characters': 164898, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'urd_Arab-kas_Arab': {'num_samples': 1503, 'number_of_characters': 164402, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'urd_Arab-mai_Deva': {'num_samples': 1503, 'number_of_characters': 162129, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'urd_Arab-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 172559, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'urd_Arab-mar_Deva': {'num_samples': 1503, 'number_of_characters': 162468, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'urd_Arab-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 157037, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'urd_Arab-npi_Deva': {'num_samples': 1503, 'number_of_characters': 160627, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'urd_Arab-ory_Orya': {'num_samples': 1503, 'number_of_characters': 163944, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'urd_Arab-pan_Guru': {'num_samples': 1503, 'number_of_characters': 159922, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'urd_Arab-san_Deva': {'num_samples': 1503, 'number_of_characters': 157814, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'urd_Arab-sat_Olck': {'num_samples': 1503, 'number_of_characters': 169100, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'urd_Arab-snd_Deva': {'num_samples': 1503, 'number_of_characters': 162344, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'urd_Arab-tam_Taml': {'num_samples': 1503, 'number_of_characters': 174587, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'urd_Arab-tel_Telu': {'num_samples': 1503, 'number_of_characters': 157411, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}}}} | +| [IN22GenBitextMining](https://huggingface.co/datasets/ai4bharat/IN22-Gen) (Jay Gala, 2023) | ['asm', 'ben', 'brx', 'doi', 'eng', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] | BitextMining | s2s | [Web, Legal, Government, News, Religious, Non-fiction, Written] | {'test': 518144} | {'test': {'num_samples': 518144, 'number_of_characters': 162367876, 'unique_pairs': 518101, 'min_sentence1_length': 9, 'average_sentence1_length': 156.68, 'max_sentence1_length': 692, 'unique_sentence1': 23550, 'min_sentence2_length': 9, 'average_sentence2_length': 156.68, 'max_sentence2_length': 692, 'unique_sentence2': 23550, 'hf_subset_descriptive_stats': {'asm_Beng-ben_Beng': {'num_samples': 1024, 'number_of_characters': 310622, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'asm_Beng-brx_Deva': {'num_samples': 1024, 'number_of_characters': 323609, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'asm_Beng-doi_Deva': {'num_samples': 1024, 'number_of_characters': 319020, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'asm_Beng-eng_Latn': {'num_samples': 1024, 'number_of_characters': 320098, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'asm_Beng-gom_Deva': {'num_samples': 1024, 'number_of_characters': 312594, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'asm_Beng-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 309440, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'asm_Beng-hin_Deva': {'num_samples': 1024, 'number_of_characters': 320106, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'asm_Beng-kan_Knda': {'num_samples': 1024, 'number_of_characters': 332064, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'asm_Beng-kas_Arab': {'num_samples': 1024, 'number_of_characters': 322764, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'asm_Beng-mai_Deva': {'num_samples': 1024, 'number_of_characters': 308682, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'asm_Beng-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 343636, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'asm_Beng-mar_Deva': {'num_samples': 1024, 'number_of_characters': 321784, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'asm_Beng-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 313134, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'asm_Beng-npi_Deva': {'num_samples': 1024, 'number_of_characters': 313419, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'asm_Beng-ory_Orya': {'num_samples': 1024, 'number_of_characters': 334226, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'asm_Beng-pan_Guru': {'num_samples': 1024, 'number_of_characters': 306863, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'asm_Beng-san_Deva': {'num_samples': 1024, 'number_of_characters': 318079, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'asm_Beng-sat_Olck': {'num_samples': 1024, 'number_of_characters': 326732, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'asm_Beng-snd_Deva': {'num_samples': 1024, 'number_of_characters': 320421, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'asm_Beng-tam_Taml': {'num_samples': 1024, 'number_of_characters': 348346, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'asm_Beng-tel_Telu': {'num_samples': 1024, 'number_of_characters': 319045, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'asm_Beng-urd_Arab': {'num_samples': 1024, 'number_of_characters': 315134, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'ben_Beng-asm_Beng': {'num_samples': 1024, 'number_of_characters': 310622, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'ben_Beng-brx_Deva': {'num_samples': 1024, 'number_of_characters': 313313, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'ben_Beng-doi_Deva': {'num_samples': 1024, 'number_of_characters': 308724, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'ben_Beng-eng_Latn': {'num_samples': 1024, 'number_of_characters': 309802, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'ben_Beng-gom_Deva': {'num_samples': 1024, 'number_of_characters': 302298, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'ben_Beng-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 299144, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'ben_Beng-hin_Deva': {'num_samples': 1024, 'number_of_characters': 309810, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'ben_Beng-kan_Knda': {'num_samples': 1024, 'number_of_characters': 321768, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'ben_Beng-kas_Arab': {'num_samples': 1024, 'number_of_characters': 312468, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'ben_Beng-mai_Deva': {'num_samples': 1024, 'number_of_characters': 298386, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'ben_Beng-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 333340, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'ben_Beng-mar_Deva': {'num_samples': 1024, 'number_of_characters': 311488, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'ben_Beng-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 302838, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'ben_Beng-npi_Deva': {'num_samples': 1024, 'number_of_characters': 303123, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'ben_Beng-ory_Orya': {'num_samples': 1024, 'number_of_characters': 323930, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'ben_Beng-pan_Guru': {'num_samples': 1024, 'number_of_characters': 296567, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'ben_Beng-san_Deva': {'num_samples': 1024, 'number_of_characters': 307783, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'ben_Beng-sat_Olck': {'num_samples': 1024, 'number_of_characters': 316436, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'ben_Beng-snd_Deva': {'num_samples': 1024, 'number_of_characters': 310125, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'ben_Beng-tam_Taml': {'num_samples': 1024, 'number_of_characters': 338050, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'ben_Beng-tel_Telu': {'num_samples': 1024, 'number_of_characters': 308749, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'ben_Beng-urd_Arab': {'num_samples': 1024, 'number_of_characters': 304838, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'brx_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 323609, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'brx_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 313313, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'brx_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 321711, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'brx_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 322789, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'brx_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 315285, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'brx_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 312131, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'brx_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 322797, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'brx_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 334755, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'brx_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 325455, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'brx_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 311373, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'brx_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 346327, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'brx_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 324475, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'brx_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 315825, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'brx_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 316110, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'brx_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 336917, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'brx_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 309554, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'brx_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 320770, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'brx_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 329423, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'brx_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 323112, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'brx_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 351037, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'brx_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 321736, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'brx_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 317825, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'doi_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 319020, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'doi_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 308724, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'doi_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 321711, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'doi_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 318200, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'doi_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 310696, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'doi_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 307542, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'doi_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 318208, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'doi_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 330166, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'doi_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 320866, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'doi_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 306784, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'doi_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 341738, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'doi_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 319886, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'doi_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 311236, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'doi_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 311521, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'doi_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 332328, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'doi_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 304965, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'doi_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 316181, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'doi_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 324834, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'doi_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 318523, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'doi_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 346448, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'doi_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 317147, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'doi_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 313236, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'eng_Latn-asm_Beng': {'num_samples': 1024, 'number_of_characters': 320098, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'eng_Latn-ben_Beng': {'num_samples': 1024, 'number_of_characters': 309802, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'eng_Latn-brx_Deva': {'num_samples': 1024, 'number_of_characters': 322789, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'eng_Latn-doi_Deva': {'num_samples': 1024, 'number_of_characters': 318200, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'eng_Latn-gom_Deva': {'num_samples': 1024, 'number_of_characters': 311774, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'eng_Latn-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 308620, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'eng_Latn-hin_Deva': {'num_samples': 1024, 'number_of_characters': 319286, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'eng_Latn-kan_Knda': {'num_samples': 1024, 'number_of_characters': 331244, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'eng_Latn-kas_Arab': {'num_samples': 1024, 'number_of_characters': 321944, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'eng_Latn-mai_Deva': {'num_samples': 1024, 'number_of_characters': 307862, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'eng_Latn-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 342816, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'eng_Latn-mar_Deva': {'num_samples': 1024, 'number_of_characters': 320964, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'eng_Latn-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 312314, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'eng_Latn-npi_Deva': {'num_samples': 1024, 'number_of_characters': 312599, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'eng_Latn-ory_Orya': {'num_samples': 1024, 'number_of_characters': 333406, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'eng_Latn-pan_Guru': {'num_samples': 1024, 'number_of_characters': 306043, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'eng_Latn-san_Deva': {'num_samples': 1024, 'number_of_characters': 317259, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'eng_Latn-sat_Olck': {'num_samples': 1024, 'number_of_characters': 325912, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'eng_Latn-snd_Deva': {'num_samples': 1024, 'number_of_characters': 319601, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'eng_Latn-tam_Taml': {'num_samples': 1024, 'number_of_characters': 347526, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'eng_Latn-tel_Telu': {'num_samples': 1024, 'number_of_characters': 318225, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'eng_Latn-urd_Arab': {'num_samples': 1024, 'number_of_characters': 314314, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'gom_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 312594, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'gom_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 302298, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'gom_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 315285, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'gom_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 310696, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'gom_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 311774, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'gom_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 301116, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'gom_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 311782, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'gom_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 323740, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'gom_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 314440, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'gom_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 300358, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'gom_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 335312, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'gom_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 313460, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'gom_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 304810, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'gom_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 305095, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'gom_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 325902, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'gom_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 298539, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'gom_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 309755, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'gom_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 318408, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'gom_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 312097, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'gom_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 340022, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'gom_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 310721, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'gom_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 306810, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'guj_Gujr-asm_Beng': {'num_samples': 1024, 'number_of_characters': 309440, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'guj_Gujr-ben_Beng': {'num_samples': 1024, 'number_of_characters': 299144, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'guj_Gujr-brx_Deva': {'num_samples': 1024, 'number_of_characters': 312131, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'guj_Gujr-doi_Deva': {'num_samples': 1024, 'number_of_characters': 307542, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'guj_Gujr-eng_Latn': {'num_samples': 1024, 'number_of_characters': 308620, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'guj_Gujr-gom_Deva': {'num_samples': 1024, 'number_of_characters': 301116, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'guj_Gujr-hin_Deva': {'num_samples': 1024, 'number_of_characters': 308628, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'guj_Gujr-kan_Knda': {'num_samples': 1024, 'number_of_characters': 320586, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'guj_Gujr-kas_Arab': {'num_samples': 1024, 'number_of_characters': 311286, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'guj_Gujr-mai_Deva': {'num_samples': 1024, 'number_of_characters': 297204, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'guj_Gujr-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 332158, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'guj_Gujr-mar_Deva': {'num_samples': 1024, 'number_of_characters': 310306, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'guj_Gujr-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 301656, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'guj_Gujr-npi_Deva': {'num_samples': 1024, 'number_of_characters': 301941, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'guj_Gujr-ory_Orya': {'num_samples': 1024, 'number_of_characters': 322748, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'guj_Gujr-pan_Guru': {'num_samples': 1024, 'number_of_characters': 295385, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'guj_Gujr-san_Deva': {'num_samples': 1024, 'number_of_characters': 306601, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'guj_Gujr-sat_Olck': {'num_samples': 1024, 'number_of_characters': 315254, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'guj_Gujr-snd_Deva': {'num_samples': 1024, 'number_of_characters': 308943, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'guj_Gujr-tam_Taml': {'num_samples': 1024, 'number_of_characters': 336868, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'guj_Gujr-tel_Telu': {'num_samples': 1024, 'number_of_characters': 307567, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'guj_Gujr-urd_Arab': {'num_samples': 1024, 'number_of_characters': 303656, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'hin_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 320106, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'hin_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 309810, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'hin_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 322797, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'hin_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 318208, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'hin_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 319286, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'hin_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 311782, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'hin_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 308628, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'hin_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 331252, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'hin_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 321952, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'hin_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 307870, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'hin_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 342824, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'hin_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 320972, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'hin_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 312322, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'hin_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 312607, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'hin_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 333414, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'hin_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 306051, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'hin_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 317267, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'hin_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 325920, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'hin_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 319609, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'hin_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 347534, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'hin_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 318233, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'hin_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 314322, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'kan_Knda-asm_Beng': {'num_samples': 1024, 'number_of_characters': 332064, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'kan_Knda-ben_Beng': {'num_samples': 1024, 'number_of_characters': 321768, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'kan_Knda-brx_Deva': {'num_samples': 1024, 'number_of_characters': 334755, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'kan_Knda-doi_Deva': {'num_samples': 1024, 'number_of_characters': 330166, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'kan_Knda-eng_Latn': {'num_samples': 1024, 'number_of_characters': 331244, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'kan_Knda-gom_Deva': {'num_samples': 1024, 'number_of_characters': 323740, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'kan_Knda-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 320586, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'kan_Knda-hin_Deva': {'num_samples': 1024, 'number_of_characters': 331252, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'kan_Knda-kas_Arab': {'num_samples': 1024, 'number_of_characters': 333910, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'kan_Knda-mai_Deva': {'num_samples': 1024, 'number_of_characters': 319828, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'kan_Knda-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 354782, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'kan_Knda-mar_Deva': {'num_samples': 1024, 'number_of_characters': 332930, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'kan_Knda-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 324280, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'kan_Knda-npi_Deva': {'num_samples': 1024, 'number_of_characters': 324565, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'kan_Knda-ory_Orya': {'num_samples': 1024, 'number_of_characters': 345372, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'kan_Knda-pan_Guru': {'num_samples': 1024, 'number_of_characters': 318009, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'kan_Knda-san_Deva': {'num_samples': 1024, 'number_of_characters': 329225, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'kan_Knda-sat_Olck': {'num_samples': 1024, 'number_of_characters': 337878, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'kan_Knda-snd_Deva': {'num_samples': 1024, 'number_of_characters': 331567, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'kan_Knda-tam_Taml': {'num_samples': 1024, 'number_of_characters': 359492, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'kan_Knda-tel_Telu': {'num_samples': 1024, 'number_of_characters': 330191, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'kan_Knda-urd_Arab': {'num_samples': 1024, 'number_of_characters': 326280, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'kas_Arab-asm_Beng': {'num_samples': 1024, 'number_of_characters': 322764, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'kas_Arab-ben_Beng': {'num_samples': 1024, 'number_of_characters': 312468, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'kas_Arab-brx_Deva': {'num_samples': 1024, 'number_of_characters': 325455, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'kas_Arab-doi_Deva': {'num_samples': 1024, 'number_of_characters': 320866, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'kas_Arab-eng_Latn': {'num_samples': 1024, 'number_of_characters': 321944, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'kas_Arab-gom_Deva': {'num_samples': 1024, 'number_of_characters': 314440, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'kas_Arab-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 311286, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'kas_Arab-hin_Deva': {'num_samples': 1024, 'number_of_characters': 321952, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'kas_Arab-kan_Knda': {'num_samples': 1024, 'number_of_characters': 333910, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'kas_Arab-mai_Deva': {'num_samples': 1024, 'number_of_characters': 310528, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'kas_Arab-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 345482, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'kas_Arab-mar_Deva': {'num_samples': 1024, 'number_of_characters': 323630, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'kas_Arab-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 314980, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'kas_Arab-npi_Deva': {'num_samples': 1024, 'number_of_characters': 315265, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'kas_Arab-ory_Orya': {'num_samples': 1024, 'number_of_characters': 336072, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'kas_Arab-pan_Guru': {'num_samples': 1024, 'number_of_characters': 308709, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'kas_Arab-san_Deva': {'num_samples': 1024, 'number_of_characters': 319925, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'kas_Arab-sat_Olck': {'num_samples': 1024, 'number_of_characters': 328578, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'kas_Arab-snd_Deva': {'num_samples': 1024, 'number_of_characters': 322267, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'kas_Arab-tam_Taml': {'num_samples': 1024, 'number_of_characters': 350192, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'kas_Arab-tel_Telu': {'num_samples': 1024, 'number_of_characters': 320891, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'kas_Arab-urd_Arab': {'num_samples': 1024, 'number_of_characters': 316980, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'mai_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 308682, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'mai_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 298386, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'mai_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 311373, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'mai_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 306784, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'mai_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 307862, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'mai_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 300358, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'mai_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 297204, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'mai_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 307870, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'mai_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 319828, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'mai_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 310528, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'mai_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 331400, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'mai_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 309548, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'mai_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 300898, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'mai_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 301183, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'mai_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 321990, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'mai_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 294627, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'mai_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 305843, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'mai_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 314496, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'mai_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 308185, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'mai_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 336110, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'mai_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 306809, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'mai_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 302898, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'mal_Mlym-asm_Beng': {'num_samples': 1024, 'number_of_characters': 343636, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'mal_Mlym-ben_Beng': {'num_samples': 1024, 'number_of_characters': 333340, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'mal_Mlym-brx_Deva': {'num_samples': 1024, 'number_of_characters': 346327, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'mal_Mlym-doi_Deva': {'num_samples': 1024, 'number_of_characters': 341738, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'mal_Mlym-eng_Latn': {'num_samples': 1024, 'number_of_characters': 342816, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'mal_Mlym-gom_Deva': {'num_samples': 1024, 'number_of_characters': 335312, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'mal_Mlym-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 332158, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'mal_Mlym-hin_Deva': {'num_samples': 1024, 'number_of_characters': 342824, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'mal_Mlym-kan_Knda': {'num_samples': 1024, 'number_of_characters': 354782, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'mal_Mlym-kas_Arab': {'num_samples': 1024, 'number_of_characters': 345482, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'mal_Mlym-mai_Deva': {'num_samples': 1024, 'number_of_characters': 331400, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'mal_Mlym-mar_Deva': {'num_samples': 1024, 'number_of_characters': 344502, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'mal_Mlym-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 335852, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'mal_Mlym-npi_Deva': {'num_samples': 1024, 'number_of_characters': 336137, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'mal_Mlym-ory_Orya': {'num_samples': 1024, 'number_of_characters': 356944, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'mal_Mlym-pan_Guru': {'num_samples': 1024, 'number_of_characters': 329581, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'mal_Mlym-san_Deva': {'num_samples': 1024, 'number_of_characters': 340797, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'mal_Mlym-sat_Olck': {'num_samples': 1024, 'number_of_characters': 349450, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'mal_Mlym-snd_Deva': {'num_samples': 1024, 'number_of_characters': 343139, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'mal_Mlym-tam_Taml': {'num_samples': 1024, 'number_of_characters': 371064, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'mal_Mlym-tel_Telu': {'num_samples': 1024, 'number_of_characters': 341763, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'mal_Mlym-urd_Arab': {'num_samples': 1024, 'number_of_characters': 337852, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'mar_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 321784, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'mar_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 311488, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'mar_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 324475, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'mar_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 319886, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'mar_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 320964, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'mar_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 313460, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'mar_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 310306, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'mar_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 320972, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'mar_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 332930, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'mar_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 323630, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'mar_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 309548, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'mar_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 344502, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'mar_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 314000, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'mar_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 314285, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'mar_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 335092, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'mar_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 307729, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'mar_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 318945, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'mar_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 327598, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'mar_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 321287, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'mar_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 349212, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'mar_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 319911, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'mar_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 316000, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'mni_Mtei-asm_Beng': {'num_samples': 1024, 'number_of_characters': 313134, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'mni_Mtei-ben_Beng': {'num_samples': 1024, 'number_of_characters': 302838, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'mni_Mtei-brx_Deva': {'num_samples': 1024, 'number_of_characters': 315825, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'mni_Mtei-doi_Deva': {'num_samples': 1024, 'number_of_characters': 311236, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'mni_Mtei-eng_Latn': {'num_samples': 1024, 'number_of_characters': 312314, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'mni_Mtei-gom_Deva': {'num_samples': 1024, 'number_of_characters': 304810, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'mni_Mtei-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 301656, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'mni_Mtei-hin_Deva': {'num_samples': 1024, 'number_of_characters': 312322, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'mni_Mtei-kan_Knda': {'num_samples': 1024, 'number_of_characters': 324280, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'mni_Mtei-kas_Arab': {'num_samples': 1024, 'number_of_characters': 314980, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'mni_Mtei-mai_Deva': {'num_samples': 1024, 'number_of_characters': 300898, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'mni_Mtei-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 335852, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'mni_Mtei-mar_Deva': {'num_samples': 1024, 'number_of_characters': 314000, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'mni_Mtei-npi_Deva': {'num_samples': 1024, 'number_of_characters': 305635, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'mni_Mtei-ory_Orya': {'num_samples': 1024, 'number_of_characters': 326442, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'mni_Mtei-pan_Guru': {'num_samples': 1024, 'number_of_characters': 299079, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'mni_Mtei-san_Deva': {'num_samples': 1024, 'number_of_characters': 310295, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'mni_Mtei-sat_Olck': {'num_samples': 1024, 'number_of_characters': 318948, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'mni_Mtei-snd_Deva': {'num_samples': 1024, 'number_of_characters': 312637, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'mni_Mtei-tam_Taml': {'num_samples': 1024, 'number_of_characters': 340562, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'mni_Mtei-tel_Telu': {'num_samples': 1024, 'number_of_characters': 311261, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'mni_Mtei-urd_Arab': {'num_samples': 1024, 'number_of_characters': 307350, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'npi_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 313419, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'npi_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 303123, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'npi_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 316110, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'npi_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 311521, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'npi_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 312599, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'npi_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 305095, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'npi_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 301941, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'npi_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 312607, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'npi_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 324565, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'npi_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 315265, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'npi_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 301183, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'npi_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 336137, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'npi_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 314285, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'npi_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 305635, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'npi_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 326727, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'npi_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 299364, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'npi_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 310580, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'npi_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 319233, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'npi_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 312922, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'npi_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 340847, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'npi_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 311546, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'npi_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 307635, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'ory_Orya-asm_Beng': {'num_samples': 1024, 'number_of_characters': 334226, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'ory_Orya-ben_Beng': {'num_samples': 1024, 'number_of_characters': 323930, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'ory_Orya-brx_Deva': {'num_samples': 1024, 'number_of_characters': 336917, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'ory_Orya-doi_Deva': {'num_samples': 1024, 'number_of_characters': 332328, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'ory_Orya-eng_Latn': {'num_samples': 1024, 'number_of_characters': 333406, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'ory_Orya-gom_Deva': {'num_samples': 1024, 'number_of_characters': 325902, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'ory_Orya-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 322748, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'ory_Orya-hin_Deva': {'num_samples': 1024, 'number_of_characters': 333414, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'ory_Orya-kan_Knda': {'num_samples': 1024, 'number_of_characters': 345372, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'ory_Orya-kas_Arab': {'num_samples': 1024, 'number_of_characters': 336072, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'ory_Orya-mai_Deva': {'num_samples': 1024, 'number_of_characters': 321990, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'ory_Orya-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 356944, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'ory_Orya-mar_Deva': {'num_samples': 1024, 'number_of_characters': 335092, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'ory_Orya-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 326442, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'ory_Orya-npi_Deva': {'num_samples': 1024, 'number_of_characters': 326727, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'ory_Orya-pan_Guru': {'num_samples': 1024, 'number_of_characters': 320171, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'ory_Orya-san_Deva': {'num_samples': 1024, 'number_of_characters': 331387, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'ory_Orya-sat_Olck': {'num_samples': 1024, 'number_of_characters': 340040, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'ory_Orya-snd_Deva': {'num_samples': 1024, 'number_of_characters': 333729, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'ory_Orya-tam_Taml': {'num_samples': 1024, 'number_of_characters': 361654, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'ory_Orya-tel_Telu': {'num_samples': 1024, 'number_of_characters': 332353, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'ory_Orya-urd_Arab': {'num_samples': 1024, 'number_of_characters': 328442, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'pan_Guru-asm_Beng': {'num_samples': 1024, 'number_of_characters': 306863, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'pan_Guru-ben_Beng': {'num_samples': 1024, 'number_of_characters': 296567, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'pan_Guru-brx_Deva': {'num_samples': 1024, 'number_of_characters': 309554, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'pan_Guru-doi_Deva': {'num_samples': 1024, 'number_of_characters': 304965, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'pan_Guru-eng_Latn': {'num_samples': 1024, 'number_of_characters': 306043, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'pan_Guru-gom_Deva': {'num_samples': 1024, 'number_of_characters': 298539, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'pan_Guru-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 295385, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'pan_Guru-hin_Deva': {'num_samples': 1024, 'number_of_characters': 306051, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'pan_Guru-kan_Knda': {'num_samples': 1024, 'number_of_characters': 318009, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'pan_Guru-kas_Arab': {'num_samples': 1024, 'number_of_characters': 308709, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'pan_Guru-mai_Deva': {'num_samples': 1024, 'number_of_characters': 294627, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'pan_Guru-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 329581, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'pan_Guru-mar_Deva': {'num_samples': 1024, 'number_of_characters': 307729, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'pan_Guru-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 299079, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'pan_Guru-npi_Deva': {'num_samples': 1024, 'number_of_characters': 299364, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'pan_Guru-ory_Orya': {'num_samples': 1024, 'number_of_characters': 320171, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'pan_Guru-san_Deva': {'num_samples': 1024, 'number_of_characters': 304024, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'pan_Guru-sat_Olck': {'num_samples': 1024, 'number_of_characters': 312677, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'pan_Guru-snd_Deva': {'num_samples': 1024, 'number_of_characters': 306366, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'pan_Guru-tam_Taml': {'num_samples': 1024, 'number_of_characters': 334291, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'pan_Guru-tel_Telu': {'num_samples': 1024, 'number_of_characters': 304990, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'pan_Guru-urd_Arab': {'num_samples': 1024, 'number_of_characters': 301079, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'san_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 318079, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'san_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 307783, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'san_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 320770, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'san_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 316181, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'san_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 317259, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'san_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 309755, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'san_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 306601, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'san_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 317267, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'san_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 329225, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'san_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 319925, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'san_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 305843, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'san_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 340797, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'san_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 318945, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'san_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 310295, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'san_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 310580, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'san_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 331387, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'san_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 304024, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'san_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 323893, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'san_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 317582, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'san_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 345507, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'san_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 316206, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'san_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 312295, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'sat_Olck-asm_Beng': {'num_samples': 1024, 'number_of_characters': 326732, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'sat_Olck-ben_Beng': {'num_samples': 1024, 'number_of_characters': 316436, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'sat_Olck-brx_Deva': {'num_samples': 1024, 'number_of_characters': 329423, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'sat_Olck-doi_Deva': {'num_samples': 1024, 'number_of_characters': 324834, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'sat_Olck-eng_Latn': {'num_samples': 1024, 'number_of_characters': 325912, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'sat_Olck-gom_Deva': {'num_samples': 1024, 'number_of_characters': 318408, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'sat_Olck-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 315254, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'sat_Olck-hin_Deva': {'num_samples': 1024, 'number_of_characters': 325920, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'sat_Olck-kan_Knda': {'num_samples': 1024, 'number_of_characters': 337878, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'sat_Olck-kas_Arab': {'num_samples': 1024, 'number_of_characters': 328578, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'sat_Olck-mai_Deva': {'num_samples': 1024, 'number_of_characters': 314496, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'sat_Olck-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 349450, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'sat_Olck-mar_Deva': {'num_samples': 1024, 'number_of_characters': 327598, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'sat_Olck-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 318948, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'sat_Olck-npi_Deva': {'num_samples': 1024, 'number_of_characters': 319233, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'sat_Olck-ory_Orya': {'num_samples': 1024, 'number_of_characters': 340040, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'sat_Olck-pan_Guru': {'num_samples': 1024, 'number_of_characters': 312677, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'sat_Olck-san_Deva': {'num_samples': 1024, 'number_of_characters': 323893, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'sat_Olck-snd_Deva': {'num_samples': 1024, 'number_of_characters': 326235, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'sat_Olck-tam_Taml': {'num_samples': 1024, 'number_of_characters': 354160, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'sat_Olck-tel_Telu': {'num_samples': 1024, 'number_of_characters': 324859, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'sat_Olck-urd_Arab': {'num_samples': 1024, 'number_of_characters': 320948, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'snd_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 320421, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'snd_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 310125, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'snd_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 323112, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'snd_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 318523, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'snd_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 319601, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'snd_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 312097, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'snd_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 308943, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'snd_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 319609, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'snd_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 331567, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'snd_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 322267, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'snd_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 308185, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'snd_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 343139, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'snd_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 321287, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'snd_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 312637, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'snd_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 312922, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'snd_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 333729, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'snd_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 306366, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'snd_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 317582, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'snd_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 326235, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'snd_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 347849, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'snd_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 318548, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'snd_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 314637, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'tam_Taml-asm_Beng': {'num_samples': 1024, 'number_of_characters': 348346, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'tam_Taml-ben_Beng': {'num_samples': 1024, 'number_of_characters': 338050, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'tam_Taml-brx_Deva': {'num_samples': 1024, 'number_of_characters': 351037, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'tam_Taml-doi_Deva': {'num_samples': 1024, 'number_of_characters': 346448, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'tam_Taml-eng_Latn': {'num_samples': 1024, 'number_of_characters': 347526, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'tam_Taml-gom_Deva': {'num_samples': 1024, 'number_of_characters': 340022, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'tam_Taml-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 336868, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'tam_Taml-hin_Deva': {'num_samples': 1024, 'number_of_characters': 347534, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'tam_Taml-kan_Knda': {'num_samples': 1024, 'number_of_characters': 359492, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'tam_Taml-kas_Arab': {'num_samples': 1024, 'number_of_characters': 350192, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'tam_Taml-mai_Deva': {'num_samples': 1024, 'number_of_characters': 336110, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'tam_Taml-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 371064, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'tam_Taml-mar_Deva': {'num_samples': 1024, 'number_of_characters': 349212, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'tam_Taml-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 340562, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'tam_Taml-npi_Deva': {'num_samples': 1024, 'number_of_characters': 340847, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'tam_Taml-ory_Orya': {'num_samples': 1024, 'number_of_characters': 361654, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'tam_Taml-pan_Guru': {'num_samples': 1024, 'number_of_characters': 334291, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'tam_Taml-san_Deva': {'num_samples': 1024, 'number_of_characters': 345507, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'tam_Taml-sat_Olck': {'num_samples': 1024, 'number_of_characters': 354160, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'tam_Taml-snd_Deva': {'num_samples': 1024, 'number_of_characters': 347849, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'tam_Taml-tel_Telu': {'num_samples': 1024, 'number_of_characters': 346473, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'tam_Taml-urd_Arab': {'num_samples': 1024, 'number_of_characters': 342562, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'tel_Telu-asm_Beng': {'num_samples': 1024, 'number_of_characters': 319045, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'tel_Telu-ben_Beng': {'num_samples': 1024, 'number_of_characters': 308749, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'tel_Telu-brx_Deva': {'num_samples': 1024, 'number_of_characters': 321736, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'tel_Telu-doi_Deva': {'num_samples': 1024, 'number_of_characters': 317147, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'tel_Telu-eng_Latn': {'num_samples': 1024, 'number_of_characters': 318225, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'tel_Telu-gom_Deva': {'num_samples': 1024, 'number_of_characters': 310721, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'tel_Telu-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 307567, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'tel_Telu-hin_Deva': {'num_samples': 1024, 'number_of_characters': 318233, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'tel_Telu-kan_Knda': {'num_samples': 1024, 'number_of_characters': 330191, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'tel_Telu-kas_Arab': {'num_samples': 1024, 'number_of_characters': 320891, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'tel_Telu-mai_Deva': {'num_samples': 1024, 'number_of_characters': 306809, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'tel_Telu-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 341763, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'tel_Telu-mar_Deva': {'num_samples': 1024, 'number_of_characters': 319911, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'tel_Telu-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 311261, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'tel_Telu-npi_Deva': {'num_samples': 1024, 'number_of_characters': 311546, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'tel_Telu-ory_Orya': {'num_samples': 1024, 'number_of_characters': 332353, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'tel_Telu-pan_Guru': {'num_samples': 1024, 'number_of_characters': 304990, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'tel_Telu-san_Deva': {'num_samples': 1024, 'number_of_characters': 316206, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'tel_Telu-sat_Olck': {'num_samples': 1024, 'number_of_characters': 324859, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'tel_Telu-snd_Deva': {'num_samples': 1024, 'number_of_characters': 318548, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'tel_Telu-tam_Taml': {'num_samples': 1024, 'number_of_characters': 346473, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'tel_Telu-urd_Arab': {'num_samples': 1024, 'number_of_characters': 313261, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'urd_Arab-asm_Beng': {'num_samples': 1024, 'number_of_characters': 315134, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'urd_Arab-ben_Beng': {'num_samples': 1024, 'number_of_characters': 304838, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'urd_Arab-brx_Deva': {'num_samples': 1024, 'number_of_characters': 317825, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'urd_Arab-doi_Deva': {'num_samples': 1024, 'number_of_characters': 313236, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'urd_Arab-eng_Latn': {'num_samples': 1024, 'number_of_characters': 314314, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'urd_Arab-gom_Deva': {'num_samples': 1024, 'number_of_characters': 306810, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'urd_Arab-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 303656, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'urd_Arab-hin_Deva': {'num_samples': 1024, 'number_of_characters': 314322, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'urd_Arab-kan_Knda': {'num_samples': 1024, 'number_of_characters': 326280, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'urd_Arab-kas_Arab': {'num_samples': 1024, 'number_of_characters': 316980, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'urd_Arab-mai_Deva': {'num_samples': 1024, 'number_of_characters': 302898, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'urd_Arab-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 337852, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'urd_Arab-mar_Deva': {'num_samples': 1024, 'number_of_characters': 316000, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'urd_Arab-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 307350, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'urd_Arab-npi_Deva': {'num_samples': 1024, 'number_of_characters': 307635, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'urd_Arab-ory_Orya': {'num_samples': 1024, 'number_of_characters': 328442, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'urd_Arab-pan_Guru': {'num_samples': 1024, 'number_of_characters': 301079, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'urd_Arab-san_Deva': {'num_samples': 1024, 'number_of_characters': 312295, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'urd_Arab-sat_Olck': {'num_samples': 1024, 'number_of_characters': 320948, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'urd_Arab-snd_Deva': {'num_samples': 1024, 'number_of_characters': 314637, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'urd_Arab-tam_Taml': {'num_samples': 1024, 'number_of_characters': 342562, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'urd_Arab-tel_Telu': {'num_samples': 1024, 'number_of_characters': 313261, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}}}} | +| [IWSLT2017BitextMining](https://aclanthology.org/2017.iwslt-1.1/) | ['ara', 'cmn', 'deu', 'eng', 'fra', 'ita', 'jpn', 'kor', 'nld', 'ron'] | BitextMining | s2s | [Non-fiction, Fiction, Written] | {'validation': 21938} | {'validation': {'num_samples': 21938, 'number_of_characters': 4256244, 'unique_pairs': 21840, 'min_sentence1_length': 2, 'average_sentence1_length': 97.01, 'max_sentence1_length': 521, 'unique_sentence1': 11563, 'min_sentence2_length': 2, 'average_sentence2_length': 97.01, 'max_sentence2_length': 521, 'unique_sentence2': 11563, 'hf_subset_descriptive_stats': {'ar-en': {'num_samples': 888, 'number_of_characters': 172499, 'unique_pairs': 887, 'min_sentence1_length': 4, 'average_sentence1_length': 85.49, 'max_sentence1_length': 369, 'unique_sentence1': 887, 'min_sentence2_length': 10, 'average_sentence2_length': 108.77, 'max_sentence2_length': 462, 'unique_sentence2': 881}, 'de-en': {'num_samples': 888, 'number_of_characters': 202336, 'unique_pairs': 883, 'min_sentence1_length': 6, 'average_sentence1_length': 119.03, 'max_sentence1_length': 521, 'unique_sentence1': 881, 'min_sentence2_length': 10, 'average_sentence2_length': 108.83, 'max_sentence2_length': 462, 'unique_sentence2': 881}, 'en-ar': {'num_samples': 888, 'number_of_characters': 172499, 'unique_pairs': 887, 'min_sentence1_length': 10, 'average_sentence1_length': 108.77, 'max_sentence1_length': 462, 'unique_sentence1': 881, 'min_sentence2_length': 4, 'average_sentence2_length': 85.49, 'max_sentence2_length': 369, 'unique_sentence2': 887}, 'en-de': {'num_samples': 888, 'number_of_characters': 202336, 'unique_pairs': 883, 'min_sentence1_length': 10, 'average_sentence1_length': 108.83, 'max_sentence1_length': 462, 'unique_sentence1': 881, 'min_sentence2_length': 6, 'average_sentence2_length': 119.03, 'max_sentence2_length': 521, 'unique_sentence2': 881}, 'en-fr': {'num_samples': 890, 'number_of_characters': 197619, 'unique_pairs': 883, 'min_sentence1_length': 10, 'average_sentence1_length': 108.41, 'max_sentence1_length': 462, 'unique_sentence1': 883, 'min_sentence2_length': 6, 'average_sentence2_length': 113.63, 'max_sentence2_length': 493, 'unique_sentence2': 881}, 'en-it': {'num_samples': 929, 'number_of_characters': 191803, 'unique_pairs': 924, 'min_sentence1_length': 10, 'average_sentence1_length': 103.0, 'max_sentence1_length': 433, 'unique_sentence1': 922, 'min_sentence2_length': 7, 'average_sentence2_length': 103.46, 'max_sentence2_length': 444, 'unique_sentence2': 918}, 'en-ja': {'num_samples': 871, 'number_of_characters': 132742, 'unique_pairs': 867, 'min_sentence1_length': 10, 'average_sentence1_length': 109.81, 'max_sentence1_length': 462, 'unique_sentence1': 864, 'min_sentence2_length': 5, 'average_sentence2_length': 42.59, 'max_sentence2_length': 225, 'unique_sentence2': 866}, 'en-ko': {'num_samples': 879, 'number_of_characters': 142659, 'unique_pairs': 874, 'min_sentence1_length': 10, 'average_sentence1_length': 107.74, 'max_sentence1_length': 462, 'unique_sentence1': 872, 'min_sentence2_length': 3, 'average_sentence2_length': 54.56, 'max_sentence2_length': 250, 'unique_sentence2': 872}, 'en-nl': {'num_samples': 1003, 'number_of_characters': 189637, 'unique_pairs': 1000, 'min_sentence1_length': 10, 'average_sentence1_length': 95.27, 'max_sentence1_length': 433, 'unique_sentence1': 996, 'min_sentence2_length': 4, 'average_sentence2_length': 93.8, 'max_sentence2_length': 477, 'unique_sentence2': 1000}, 'en-ro': {'num_samples': 914, 'number_of_characters': 194128, 'unique_pairs': 910, 'min_sentence1_length': 10, 'average_sentence1_length': 104.72, 'max_sentence1_length': 433, 'unique_sentence1': 907, 'min_sentence2_length': 9, 'average_sentence2_length': 107.67, 'max_sentence2_length': 448, 'unique_sentence2': 910}, 'en-zh': {'num_samples': 879, 'number_of_characters': 131126, 'unique_pairs': 877, 'min_sentence1_length': 10, 'average_sentence1_length': 109.37, 'max_sentence1_length': 462, 'unique_sentence1': 872, 'min_sentence2_length': 2, 'average_sentence2_length': 39.81, 'max_sentence2_length': 230, 'unique_sentence2': 867}, 'fr-en': {'num_samples': 890, 'number_of_characters': 197619, 'unique_pairs': 883, 'min_sentence1_length': 6, 'average_sentence1_length': 113.63, 'max_sentence1_length': 493, 'unique_sentence1': 881, 'min_sentence2_length': 10, 'average_sentence2_length': 108.41, 'max_sentence2_length': 462, 'unique_sentence2': 883}, 'it-en': {'num_samples': 929, 'number_of_characters': 191803, 'unique_pairs': 924, 'min_sentence1_length': 7, 'average_sentence1_length': 103.46, 'max_sentence1_length': 444, 'unique_sentence1': 918, 'min_sentence2_length': 10, 'average_sentence2_length': 103.0, 'max_sentence2_length': 433, 'unique_sentence2': 922}, 'it-nl': {'num_samples': 1001, 'number_of_characters': 188858, 'unique_pairs': 998, 'min_sentence1_length': 7, 'average_sentence1_length': 94.64, 'max_sentence1_length': 459, 'unique_sentence1': 994, 'min_sentence2_length': 7, 'average_sentence2_length': 94.03, 'max_sentence2_length': 505, 'unique_sentence2': 998}, 'it-ro': {'num_samples': 914, 'number_of_characters': 193339, 'unique_pairs': 911, 'min_sentence1_length': 7, 'average_sentence1_length': 103.91, 'max_sentence1_length': 435, 'unique_sentence1': 907, 'min_sentence2_length': 9, 'average_sentence2_length': 107.62, 'max_sentence2_length': 448, 'unique_sentence2': 910}, 'ja-en': {'num_samples': 871, 'number_of_characters': 132742, 'unique_pairs': 867, 'min_sentence1_length': 5, 'average_sentence1_length': 42.59, 'max_sentence1_length': 225, 'unique_sentence1': 866, 'min_sentence2_length': 10, 'average_sentence2_length': 109.81, 'max_sentence2_length': 462, 'unique_sentence2': 864}, 'ko-en': {'num_samples': 879, 'number_of_characters': 142659, 'unique_pairs': 874, 'min_sentence1_length': 3, 'average_sentence1_length': 54.56, 'max_sentence1_length': 250, 'unique_sentence1': 872, 'min_sentence2_length': 10, 'average_sentence2_length': 107.74, 'max_sentence2_length': 462, 'unique_sentence2': 872}, 'nl-en': {'num_samples': 1003, 'number_of_characters': 189637, 'unique_pairs': 1000, 'min_sentence1_length': 4, 'average_sentence1_length': 93.8, 'max_sentence1_length': 477, 'unique_sentence1': 1000, 'min_sentence2_length': 10, 'average_sentence2_length': 95.27, 'max_sentence2_length': 433, 'unique_sentence2': 996}, 'nl-it': {'num_samples': 1001, 'number_of_characters': 188858, 'unique_pairs': 998, 'min_sentence1_length': 7, 'average_sentence1_length': 94.03, 'max_sentence1_length': 505, 'unique_sentence1': 998, 'min_sentence2_length': 7, 'average_sentence2_length': 94.64, 'max_sentence2_length': 459, 'unique_sentence2': 994}, 'nl-ro': {'num_samples': 913, 'number_of_characters': 191376, 'unique_pairs': 911, 'min_sentence1_length': 7, 'average_sentence1_length': 102.02, 'max_sentence1_length': 478, 'unique_sentence1': 909, 'min_sentence2_length': 9, 'average_sentence2_length': 107.59, 'max_sentence2_length': 515, 'unique_sentence2': 909}, 'ro-en': {'num_samples': 914, 'number_of_characters': 194128, 'unique_pairs': 910, 'min_sentence1_length': 9, 'average_sentence1_length': 107.67, 'max_sentence1_length': 448, 'unique_sentence1': 910, 'min_sentence2_length': 10, 'average_sentence2_length': 104.72, 'max_sentence2_length': 433, 'unique_sentence2': 907}, 'ro-it': {'num_samples': 914, 'number_of_characters': 193339, 'unique_pairs': 911, 'min_sentence1_length': 9, 'average_sentence1_length': 107.62, 'max_sentence1_length': 448, 'unique_sentence1': 910, 'min_sentence2_length': 7, 'average_sentence2_length': 103.91, 'max_sentence2_length': 435, 'unique_sentence2': 907}, 'ro-nl': {'num_samples': 913, 'number_of_characters': 191376, 'unique_pairs': 911, 'min_sentence1_length': 9, 'average_sentence1_length': 107.59, 'max_sentence1_length': 515, 'unique_sentence1': 909, 'min_sentence2_length': 7, 'average_sentence2_length': 102.02, 'max_sentence2_length': 478, 'unique_sentence2': 909}, 'zh-en': {'num_samples': 879, 'number_of_characters': 131126, 'unique_pairs': 877, 'min_sentence1_length': 2, 'average_sentence1_length': 39.81, 'max_sentence1_length': 230, 'unique_sentence1': 867, 'min_sentence2_length': 10, 'average_sentence2_length': 109.37, 'max_sentence2_length': 462, 'unique_sentence2': 872}}}} | | [ImdbClassification](http://www.aclweb.org/anthology/P11-1015) | ['eng'] | Classification | p2p | [Reviews, Written] | None | None | | [InappropriatenessClassification](https://aclanthology.org/2021.bsnlp-1.4) | ['rus'] | Classification | s2s | [Web, Social, Written] | None | None | | [IndicCrosslingualSTS](https://huggingface.co/datasets/jaygala24/indic_sts) (Ramesh et al., 2022) | ['asm', 'ben', 'eng', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel', 'urd'] | STS | s2s | [News, Non-fiction, Web, Spoken, Government, Written, Spoken] | None | None | -| [IndicGenBenchFloresBitextMining](https://github.com/google-research-datasets/indic-gen-bench/) (Harman Singh, 2024) | ['asm', 'awa', 'ben', 'bgc', 'bho', 'bod', 'boy', 'eng', 'gbm', 'gom', 'guj', 'hin', 'hne', 'kan', 'mai', 'mal', 'mar', 'mni', 'mup', 'mwr', 'nep', 'ory', 'pan', 'pus', 'raj', 'san', 'sat', 'tam', 'tel', 'urd'] | BitextMining | s2s | [Web, News, Written] | None | None | +| [IndicGenBenchFloresBitextMining](https://github.com/google-research-datasets/indic-gen-bench/) (Harman Singh, 2024) | ['asm', 'awa', 'ben', 'bgc', 'bho', 'bod', 'boy', 'eng', 'gbm', 'gom', 'guj', 'hin', 'hne', 'kan', 'mai', 'mal', 'mar', 'mni', 'mup', 'mwr', 'nep', 'ory', 'pan', 'pus', 'raj', 'san', 'sat', 'tam', 'tel', 'urd'] | BitextMining | s2s | [Web, News, Written] | {'validation': 57826, 'test': 58696} | {'validation': {'num_samples': 57826, 'number_of_characters': 14600950, 'unique_pairs': 57826, 'min_sentence1_length': 24, 'average_sentence1_length': 126.25, 'max_sentence1_length': 368, 'unique_sentence1': 29903, 'min_sentence2_length': 24, 'average_sentence2_length': 126.24, 'max_sentence2_length': 368, 'unique_sentence2': 29903, 'hf_subset_descriptive_stats': {'ben-eng': {'num_samples': 997, 'number_of_characters': 248469, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 123.65, 'max_sentence1_length': 320, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-ben': {'num_samples': 997, 'number_of_characters': 248469, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 123.65, 'max_sentence2_length': 320, 'unique_sentence2': 997}, 'guj-eng': {'num_samples': 997, 'number_of_characters': 245477, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 120.64, 'max_sentence1_length': 368, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-guj': {'num_samples': 997, 'number_of_characters': 245477, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 120.64, 'max_sentence2_length': 368, 'unique_sentence2': 997}, 'hin-eng': {'num_samples': 997, 'number_of_characters': 250573, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 125.76, 'max_sentence1_length': 355, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-hin': {'num_samples': 997, 'number_of_characters': 250564, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 125.75, 'max_sentence2_length': 355, 'unique_sentence2': 997}, 'kan-eng': {'num_samples': 997, 'number_of_characters': 257131, 'unique_pairs': 997, 'min_sentence1_length': 34, 'average_sentence1_length': 132.33, 'max_sentence1_length': 331, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-kan': {'num_samples': 997, 'number_of_characters': 256986, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 34, 'average_sentence2_length': 132.19, 'max_sentence2_length': 331, 'unique_sentence2': 997}, 'mal-eng': {'num_samples': 997, 'number_of_characters': 267295, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 142.53, 'max_sentence1_length': 360, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mal': {'num_samples': 997, 'number_of_characters': 267296, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 142.53, 'max_sentence2_length': 360, 'unique_sentence2': 997}, 'mar-eng': {'num_samples': 997, 'number_of_characters': 251107, 'unique_pairs': 997, 'min_sentence1_length': 29, 'average_sentence1_length': 126.29, 'max_sentence1_length': 321, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mar': {'num_samples': 997, 'number_of_characters': 250897, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 29, 'average_sentence2_length': 126.08, 'max_sentence2_length': 321, 'unique_sentence2': 997}, 'tam-eng': {'num_samples': 997, 'number_of_characters': 271322, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 146.57, 'max_sentence1_length': 358, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-tam': {'num_samples': 997, 'number_of_characters': 271322, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 146.57, 'max_sentence2_length': 358, 'unique_sentence2': 997}, 'tel-eng': {'num_samples': 997, 'number_of_characters': 252385, 'unique_pairs': 997, 'min_sentence1_length': 29, 'average_sentence1_length': 127.57, 'max_sentence1_length': 317, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-tel': {'num_samples': 997, 'number_of_characters': 252380, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 29, 'average_sentence2_length': 127.57, 'max_sentence2_length': 317, 'unique_sentence2': 997}, 'urd-eng': {'num_samples': 997, 'number_of_characters': 249824, 'unique_pairs': 997, 'min_sentence1_length': 37, 'average_sentence1_length': 125.01, 'max_sentence1_length': 295, 'unique_sentence1': 996, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-urd': {'num_samples': 997, 'number_of_characters': 249824, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 37, 'average_sentence2_length': 125.01, 'max_sentence2_length': 295, 'unique_sentence2': 996}, 'asm-eng': {'num_samples': 997, 'number_of_characters': 246220, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 121.39, 'max_sentence1_length': 314, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-asm': {'num_samples': 997, 'number_of_characters': 246224, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 121.39, 'max_sentence2_length': 314, 'unique_sentence2': 997}, 'bho-eng': {'num_samples': 997, 'number_of_characters': 246895, 'unique_pairs': 997, 'min_sentence1_length': 25, 'average_sentence1_length': 122.07, 'max_sentence1_length': 326, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-bho': {'num_samples': 997, 'number_of_characters': 246919, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 25, 'average_sentence2_length': 122.09, 'max_sentence2_length': 326, 'unique_sentence2': 997}, 'nep-eng': {'num_samples': 997, 'number_of_characters': 245984, 'unique_pairs': 997, 'min_sentence1_length': 24, 'average_sentence1_length': 121.15, 'max_sentence1_length': 307, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-nep': {'num_samples': 997, 'number_of_characters': 245984, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 24, 'average_sentence2_length': 121.15, 'max_sentence2_length': 307, 'unique_sentence2': 997}, 'ory-eng': {'num_samples': 997, 'number_of_characters': 254206, 'unique_pairs': 997, 'min_sentence1_length': 34, 'average_sentence1_length': 129.4, 'max_sentence1_length': 308, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-ory': {'num_samples': 997, 'number_of_characters': 254206, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 34, 'average_sentence2_length': 129.4, 'max_sentence2_length': 308, 'unique_sentence2': 997}, 'pan-eng': {'num_samples': 997, 'number_of_characters': 251598, 'unique_pairs': 997, 'min_sentence1_length': 29, 'average_sentence1_length': 126.78, 'max_sentence1_length': 309, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-pan': {'num_samples': 997, 'number_of_characters': 251597, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 29, 'average_sentence2_length': 126.78, 'max_sentence2_length': 309, 'unique_sentence2': 997}, 'pus-eng': {'num_samples': 997, 'number_of_characters': 247450, 'unique_pairs': 997, 'min_sentence1_length': 32, 'average_sentence1_length': 122.62, 'max_sentence1_length': 300, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-pus': {'num_samples': 997, 'number_of_characters': 247450, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 32, 'average_sentence2_length': 122.62, 'max_sentence2_length': 300, 'unique_sentence2': 997}, 'san-eng': {'num_samples': 997, 'number_of_characters': 249042, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 124.22, 'max_sentence1_length': 311, 'unique_sentence1': 994, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-san': {'num_samples': 997, 'number_of_characters': 248877, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 124.06, 'max_sentence2_length': 311, 'unique_sentence2': 994}, 'awa-eng': {'num_samples': 997, 'number_of_characters': 247944, 'unique_pairs': 997, 'min_sentence1_length': 34, 'average_sentence1_length': 123.12, 'max_sentence1_length': 329, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-awa': {'num_samples': 997, 'number_of_characters': 247884, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 34, 'average_sentence2_length': 123.06, 'max_sentence2_length': 329, 'unique_sentence2': 997}, 'bgc-eng': {'num_samples': 997, 'number_of_characters': 245935, 'unique_pairs': 997, 'min_sentence1_length': 27, 'average_sentence1_length': 121.1, 'max_sentence1_length': 303, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-bgc': {'num_samples': 997, 'number_of_characters': 245935, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 27, 'average_sentence2_length': 121.1, 'max_sentence2_length': 303, 'unique_sentence2': 997}, 'bod-eng': {'num_samples': 997, 'number_of_characters': 266515, 'unique_pairs': 997, 'min_sentence1_length': 26, 'average_sentence1_length': 141.75, 'max_sentence1_length': 355, 'unique_sentence1': 996, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-bod': {'num_samples': 997, 'number_of_characters': 266495, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 26, 'average_sentence2_length': 141.73, 'max_sentence2_length': 355, 'unique_sentence2': 996}, 'boy-eng': {'num_samples': 997, 'number_of_characters': 260174, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 135.39, 'max_sentence1_length': 312, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-boy': {'num_samples': 997, 'number_of_characters': 260174, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 135.39, 'max_sentence2_length': 312, 'unique_sentence2': 997}, 'gbm-eng': {'num_samples': 997, 'number_of_characters': 247009, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 122.18, 'max_sentence1_length': 344, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-gbm': {'num_samples': 997, 'number_of_characters': 247009, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 122.18, 'max_sentence2_length': 344, 'unique_sentence2': 997}, 'gom-eng': {'num_samples': 997, 'number_of_characters': 244553, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 119.72, 'max_sentence1_length': 306, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-gom': {'num_samples': 997, 'number_of_characters': 244553, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 119.72, 'max_sentence2_length': 306, 'unique_sentence2': 997}, 'hne-eng': {'num_samples': 997, 'number_of_characters': 246416, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 121.59, 'max_sentence1_length': 321, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-hne': {'num_samples': 997, 'number_of_characters': 246405, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 121.58, 'max_sentence2_length': 321, 'unique_sentence2': 997}, 'raj-eng': {'num_samples': 997, 'number_of_characters': 249541, 'unique_pairs': 997, 'min_sentence1_length': 32, 'average_sentence1_length': 124.72, 'max_sentence1_length': 313, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-raj': {'num_samples': 997, 'number_of_characters': 249541, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 32, 'average_sentence2_length': 124.72, 'max_sentence2_length': 313, 'unique_sentence2': 997}, 'mai-eng': {'num_samples': 997, 'number_of_characters': 247991, 'unique_pairs': 997, 'min_sentence1_length': 29, 'average_sentence1_length': 123.17, 'max_sentence1_length': 312, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mai': {'num_samples': 997, 'number_of_characters': 247994, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 29, 'average_sentence2_length': 123.17, 'max_sentence2_length': 312, 'unique_sentence2': 997}, 'mni-eng': {'num_samples': 997, 'number_of_characters': 254308, 'unique_pairs': 997, 'min_sentence1_length': 39, 'average_sentence1_length': 129.5, 'max_sentence1_length': 310, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mni': {'num_samples': 997, 'number_of_characters': 254312, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 39, 'average_sentence2_length': 129.51, 'max_sentence2_length': 310, 'unique_sentence2': 997}, 'mup-eng': {'num_samples': 997, 'number_of_characters': 248486, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 123.66, 'max_sentence1_length': 312, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mup': {'num_samples': 997, 'number_of_characters': 248486, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 123.66, 'max_sentence2_length': 312, 'unique_sentence2': 997}, 'mwr-eng': {'num_samples': 997, 'number_of_characters': 248641, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 123.82, 'max_sentence1_length': 324, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mwr': {'num_samples': 997, 'number_of_characters': 248641, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 123.82, 'max_sentence2_length': 324, 'unique_sentence2': 997}, 'sat-eng': {'num_samples': 997, 'number_of_characters': 258279, 'unique_pairs': 997, 'min_sentence1_length': 37, 'average_sentence1_length': 133.49, 'max_sentence1_length': 333, 'unique_sentence1': 995, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-sat': {'num_samples': 997, 'number_of_characters': 258279, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 37, 'average_sentence2_length': 133.49, 'max_sentence2_length': 333, 'unique_sentence2': 995}}}, 'test': {'num_samples': 58696, 'number_of_characters': 15359416, 'unique_pairs': 58690, 'min_sentence1_length': 33, 'average_sentence1_length': 130.84, 'max_sentence1_length': 431, 'unique_sentence1': 30351, 'min_sentence2_length': 33, 'average_sentence2_length': 130.83, 'max_sentence2_length': 431, 'unique_sentence2': 30351, 'hf_subset_descriptive_stats': {'ben-eng': {'num_samples': 1012, 'number_of_characters': 261008, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 127.51, 'max_sentence1_length': 333, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-ben': {'num_samples': 1012, 'number_of_characters': 261008, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 127.51, 'max_sentence2_length': 333, 'unique_sentence2': 1012}, 'guj-eng': {'num_samples': 1012, 'number_of_characters': 258394, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 124.93, 'max_sentence1_length': 349, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-guj': {'num_samples': 1012, 'number_of_characters': 258394, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 124.93, 'max_sentence2_length': 349, 'unique_sentence2': 1012}, 'hin-eng': {'num_samples': 1012, 'number_of_characters': 263040, 'unique_pairs': 1012, 'min_sentence1_length': 41, 'average_sentence1_length': 129.52, 'max_sentence1_length': 381, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-hin': {'num_samples': 1012, 'number_of_characters': 263029, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 41, 'average_sentence2_length': 129.51, 'max_sentence2_length': 381, 'unique_sentence2': 1012}, 'kan-eng': {'num_samples': 1012, 'number_of_characters': 270091, 'unique_pairs': 1012, 'min_sentence1_length': 43, 'average_sentence1_length': 136.49, 'max_sentence1_length': 388, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-kan': {'num_samples': 1012, 'number_of_characters': 270021, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 43, 'average_sentence2_length': 136.42, 'max_sentence2_length': 388, 'unique_sentence2': 1012}, 'mal-eng': {'num_samples': 1012, 'number_of_characters': 281302, 'unique_pairs': 1012, 'min_sentence1_length': 48, 'average_sentence1_length': 147.57, 'max_sentence1_length': 376, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mal': {'num_samples': 1012, 'number_of_characters': 281302, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 48, 'average_sentence2_length': 147.57, 'max_sentence2_length': 376, 'unique_sentence2': 1012}, 'mar-eng': {'num_samples': 1012, 'number_of_characters': 265212, 'unique_pairs': 1012, 'min_sentence1_length': 34, 'average_sentence1_length': 131.67, 'max_sentence1_length': 356, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mar': {'num_samples': 1012, 'number_of_characters': 265023, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 34, 'average_sentence2_length': 131.48, 'max_sentence2_length': 355, 'unique_sentence2': 1012}, 'tam-eng': {'num_samples': 1012, 'number_of_characters': 286099, 'unique_pairs': 1012, 'min_sentence1_length': 48, 'average_sentence1_length': 152.31, 'max_sentence1_length': 404, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-tam': {'num_samples': 1012, 'number_of_characters': 286099, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 48, 'average_sentence2_length': 152.31, 'max_sentence2_length': 404, 'unique_sentence2': 1012}, 'tel-eng': {'num_samples': 1012, 'number_of_characters': 264460, 'unique_pairs': 1012, 'min_sentence1_length': 39, 'average_sentence1_length': 130.92, 'max_sentence1_length': 359, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-tel': {'num_samples': 1012, 'number_of_characters': 264447, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 39, 'average_sentence2_length': 130.91, 'max_sentence2_length': 359, 'unique_sentence2': 1012}, 'urd-eng': {'num_samples': 1012, 'number_of_characters': 261886, 'unique_pairs': 1012, 'min_sentence1_length': 34, 'average_sentence1_length': 128.38, 'max_sentence1_length': 348, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-urd': {'num_samples': 1012, 'number_of_characters': 261885, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 34, 'average_sentence2_length': 128.38, 'max_sentence2_length': 348, 'unique_sentence2': 1012}, 'asm-eng': {'num_samples': 1012, 'number_of_characters': 257902, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 124.44, 'max_sentence1_length': 329, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-asm': {'num_samples': 1012, 'number_of_characters': 257909, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 124.45, 'max_sentence2_length': 329, 'unique_sentence2': 1012}, 'bho-eng': {'num_samples': 1012, 'number_of_characters': 260578, 'unique_pairs': 1012, 'min_sentence1_length': 36, 'average_sentence1_length': 127.09, 'max_sentence1_length': 367, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-bho': {'num_samples': 1012, 'number_of_characters': 260601, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 36, 'average_sentence2_length': 127.11, 'max_sentence2_length': 367, 'unique_sentence2': 1012}, 'nep-eng': {'num_samples': 1012, 'number_of_characters': 258869, 'unique_pairs': 1012, 'min_sentence1_length': 34, 'average_sentence1_length': 125.4, 'max_sentence1_length': 362, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-nep': {'num_samples': 1012, 'number_of_characters': 258869, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 34, 'average_sentence2_length': 125.4, 'max_sentence2_length': 362, 'unique_sentence2': 1012}, 'ory-eng': {'num_samples': 1012, 'number_of_characters': 266805, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 133.24, 'max_sentence1_length': 354, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-ory': {'num_samples': 1012, 'number_of_characters': 266805, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 133.24, 'max_sentence2_length': 354, 'unique_sentence2': 1012}, 'pan-eng': {'num_samples': 1012, 'number_of_characters': 265391, 'unique_pairs': 1012, 'min_sentence1_length': 37, 'average_sentence1_length': 131.84, 'max_sentence1_length': 380, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-pan': {'num_samples': 1012, 'number_of_characters': 265391, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 37, 'average_sentence2_length': 131.84, 'max_sentence2_length': 380, 'unique_sentence2': 1012}, 'pus-eng': {'num_samples': 1012, 'number_of_characters': 254422, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 121.0, 'max_sentence1_length': 325, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-pus': {'num_samples': 1012, 'number_of_characters': 254421, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 121.0, 'max_sentence2_length': 325, 'unique_sentence2': 1012}, 'san-eng': {'num_samples': 1012, 'number_of_characters': 260339, 'unique_pairs': 1012, 'min_sentence1_length': 33, 'average_sentence1_length': 126.85, 'max_sentence1_length': 358, 'unique_sentence1': 1011, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-san': {'num_samples': 1012, 'number_of_characters': 260224, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 33, 'average_sentence2_length': 126.74, 'max_sentence2_length': 358, 'unique_sentence2': 1011}, 'awa-eng': {'num_samples': 1012, 'number_of_characters': 260179, 'unique_pairs': 1012, 'min_sentence1_length': 34, 'average_sentence1_length': 126.69, 'max_sentence1_length': 378, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-awa': {'num_samples': 1012, 'number_of_characters': 260137, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 34, 'average_sentence2_length': 126.65, 'max_sentence2_length': 378, 'unique_sentence2': 1012}, 'bgc-eng': {'num_samples': 1012, 'number_of_characters': 257450, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 124.0, 'max_sentence1_length': 332, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-bgc': {'num_samples': 1012, 'number_of_characters': 257450, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 124.0, 'max_sentence2_length': 332, 'unique_sentence2': 1012}, 'bod-eng': {'num_samples': 1012, 'number_of_characters': 280188, 'unique_pairs': 1012, 'min_sentence1_length': 42, 'average_sentence1_length': 146.46, 'max_sentence1_length': 431, 'unique_sentence1': 1009, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-bod': {'num_samples': 1012, 'number_of_characters': 280126, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 42, 'average_sentence2_length': 146.4, 'max_sentence2_length': 431, 'unique_sentence2': 1009}, 'boy-eng': {'num_samples': 1012, 'number_of_characters': 277538, 'unique_pairs': 1012, 'min_sentence1_length': 36, 'average_sentence1_length': 143.85, 'max_sentence1_length': 396, 'unique_sentence1': 1011, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-boy': {'num_samples': 1012, 'number_of_characters': 277538, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 36, 'average_sentence2_length': 143.85, 'max_sentence2_length': 396, 'unique_sentence2': 1011}, 'gbm-eng': {'num_samples': 1012, 'number_of_characters': 261027, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 127.53, 'max_sentence1_length': 333, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-gbm': {'num_samples': 1012, 'number_of_characters': 261027, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 127.53, 'max_sentence2_length': 333, 'unique_sentence2': 1012}, 'gom-eng': {'num_samples': 1012, 'number_of_characters': 259182, 'unique_pairs': 1012, 'min_sentence1_length': 37, 'average_sentence1_length': 125.71, 'max_sentence1_length': 335, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-gom': {'num_samples': 1012, 'number_of_characters': 259182, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 37, 'average_sentence2_length': 125.71, 'max_sentence2_length': 335, 'unique_sentence2': 1012}, 'hne-eng': {'num_samples': 1012, 'number_of_characters': 258911, 'unique_pairs': 1012, 'min_sentence1_length': 42, 'average_sentence1_length': 125.44, 'max_sentence1_length': 327, 'unique_sentence1': 1011, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-hne': {'num_samples': 1012, 'number_of_characters': 258915, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 42, 'average_sentence2_length': 125.44, 'max_sentence2_length': 326, 'unique_sentence2': 1011}, 'raj-eng': {'num_samples': 1012, 'number_of_characters': 261987, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 128.48, 'max_sentence1_length': 338, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-raj': {'num_samples': 1012, 'number_of_characters': 261987, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 128.48, 'max_sentence2_length': 338, 'unique_sentence2': 1012}, 'mai-eng': {'num_samples': 1012, 'number_of_characters': 261374, 'unique_pairs': 1012, 'min_sentence1_length': 36, 'average_sentence1_length': 127.87, 'max_sentence1_length': 350, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mai': {'num_samples': 1012, 'number_of_characters': 261377, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 36, 'average_sentence2_length': 127.88, 'max_sentence2_length': 350, 'unique_sentence2': 1012}, 'mni-eng': {'num_samples': 1012, 'number_of_characters': 268767, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 135.18, 'max_sentence1_length': 353, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mni': {'num_samples': 1012, 'number_of_characters': 268768, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 135.18, 'max_sentence2_length': 354, 'unique_sentence2': 1012}, 'mup-eng': {'num_samples': 1012, 'number_of_characters': 262034, 'unique_pairs': 1012, 'min_sentence1_length': 40, 'average_sentence1_length': 128.53, 'max_sentence1_length': 340, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mup': {'num_samples': 1012, 'number_of_characters': 262034, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 40, 'average_sentence2_length': 128.53, 'max_sentence2_length': 340, 'unique_sentence2': 1012}, 'mwr-eng': {'num_samples': 1012, 'number_of_characters': 263749, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.22, 'max_sentence1_length': 345, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mwr': {'num_samples': 1012, 'number_of_characters': 263749, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.22, 'max_sentence2_length': 345, 'unique_sentence2': 1012}, 'sat-eng': {'num_samples': 1012, 'number_of_characters': 271757, 'unique_pairs': 1012, 'min_sentence1_length': 43, 'average_sentence1_length': 138.13, 'max_sentence1_length': 366, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-sat': {'num_samples': 1012, 'number_of_characters': 271757, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 43, 'average_sentence2_length': 138.13, 'max_sentence2_length': 366, 'unique_sentence2': 1012}}}} | | [IndicLangClassification](https://arxiv.org/abs/2305.15814) | ['asm', 'ben', 'brx', 'doi', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] | Classification | s2s | [Web, Non-fiction, Written] | None | None | | [IndicNLPNewsClassification](https://github.com/AI4Bharat/indicnlp_corpus#indicnlp-news-article-classification-dataset) (Anoop Kunchukuttan, 2020) | ['guj', 'kan', 'mal', 'mar', 'ori', 'pan', 'tam', 'tel'] | Classification | s2s | [News, Written] | None | None | | [IndicQARetrieval](https://arxiv.org/abs/2212.05409) (Sumanth Doddapaneni, 2022) | ['asm', 'ben', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel'] | Retrieval | s2p | [Web, Written] | None | None | @@ -256,7 +256,7 @@ The following tables give you an overview of the tasks in MTEB. | [JSTS](https://aclanthology.org/2022.lrec-1.317.pdf#page=2.00) | ['jpn'] | STS | s2s | [Web, Written] | None | None | | [JaGovFaqsRetrieval](https://github.com/sbintuitions/JMTEB) | ['jpn'] | Retrieval | s2s | [Web, Written] | None | None | | [JaQuADRetrieval](https://arxiv.org/abs/2202.01764) (ByungHoon So, 2022) | ['jpn'] | Retrieval | p2p | [Encyclopaedic, Non-fiction, Written] | None | None | -| [JaqketRetrieval](https://github.com/kumapo/JAQKET-dataset) | ['jpn'] | Retrieval | s2p | [Encyclopaedic, Non-fiction, Written] | {'test': 115226} | {'test': {'number_of_characters': 3799.7, 'num_samples': 115226, 'num_queries': 997, 'num_documents': 114229, 'average_document_length': 0.03, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}} | +| [JaqketRetrieval](https://github.com/kumapo/JAQKET-dataset) | ['jpn'] | Retrieval | s2p | [Encyclopaedic, Non-fiction, Written] | {'test': 115226} | {'test': {'number_of_characters': 428294530, 'num_samples': 115226, 'num_queries': 997, 'num_documents': 114229, 'min_document_length': 16, 'average_document_length': 0.44, 'max_document_length': 98, 'unique_documents': 114229, 'min_query_length': 8, 'average_query_length': 429532.57, 'max_query_length': 188424, 'unique_queries': 997, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 989}} | | [JavaneseIMDBClassification](https://github.com/w11wo/nlp-datasets#javanese-imdb) (Wongso et al., 2021) | ['jav'] | Classification | s2s | [Reviews, Written] | None | None | | [KLUE-NLI](https://arxiv.org/abs/2105.09680) (Sungjoon Park, 2021) | ['kor'] | PairClassification | s2s | [News, Encyclopaedic, Written] | None | None | | [KLUE-STS](https://arxiv.org/abs/2105.09680) (Sungjoon Park, 2021) | ['kor'] | STS | s2s | [Reviews, News, Spoken, Written, Spoken] | None | None | @@ -277,7 +277,7 @@ The following tables give you an overview of the tasks in MTEB. | [LEMBQMSumRetrieval](https://huggingface.co/datasets/dwzhu/LongEmbed) | ['eng'] | Retrieval | s2p | [Spoken, Written] | None | None | | [LEMBSummScreenFDRetrieval](https://huggingface.co/datasets/dwzhu/LongEmbed) | ['eng'] | Retrieval | s2p | [Spoken, Written] | None | None | | [LEMBWikimQARetrieval](https://huggingface.co/datasets/dwzhu/LongEmbed) (Ho et al., 2020) | ['eng'] | Retrieval | s2p | [Encyclopaedic, Written] | None | None | -| [LanguageClassification](https://huggingface.co/datasets/papluca/language-identification) (Conneau et al., 2018) | ['ara', 'bul', 'cmn', 'deu', 'ell', 'eng', 'fra', 'hin', 'ita', 'jpn', 'nld', 'pol', 'por', 'rus', 'spa', 'swa', 'tha', 'tur', 'urd', 'vie'] | Classification | s2s | [Reviews, Web, Non-fiction, Fiction, Government, Written] | {'test': 2048} | {'test': {'num_samples': 2048, 'number_of_characters': 224352, 'average_text_length': 109.55, 'unique_labels': 20, 'labels': {'17': {'count': 102}, '0': {'count': 102}, '11': {'count': 102}, '4': {'count': 103}, '3': {'count': 102}, '1': {'count': 102}, '10': {'count': 102}, '2': {'count': 103}, '16': {'count': 103}, '9': {'count': 103}, '5': {'count': 102}, '7': {'count': 102}, '13': {'count': 102}, '14': {'count': 103}, '12': {'count': 102}, '15': {'count': 103}, '19': {'count': 102}, '18': {'count': 102}, '6': {'count': 103}, '8': {'count': 103}}}} | +| [LanguageClassification](https://huggingface.co/datasets/papluca/language-identification) (Conneau et al., 2018) | ['ara', 'bul', 'cmn', 'deu', 'ell', 'eng', 'fra', 'hin', 'ita', 'jpn', 'nld', 'pol', 'por', 'rus', 'spa', 'swa', 'tha', 'tur', 'urd', 'vie'] | Classification | s2s | [Reviews, Web, Non-fiction, Fiction, Government, Written] | {'test': 2048, 'train': 70000} | {'test': {'num_samples': 2048, 'number_of_characters': 224352, 'num_texts_in_train': 31, 'min_text_length': 14, 'average_text_length': 109.55, 'max_text_length': 1270, 'unique_text': 2025, 'unique_labels': 20, 'labels': {'17': {'count': 102}, '0': {'count': 102}, '11': {'count': 102}, '4': {'count': 103}, '3': {'count': 102}, '1': {'count': 102}, '10': {'count': 102}, '2': {'count': 103}, '16': {'count': 103}, '9': {'count': 103}, '5': {'count': 102}, '7': {'count': 102}, '13': {'count': 102}, '14': {'count': 103}, '12': {'count': 102}, '15': {'count': 103}, '19': {'count': 102}, '18': {'count': 102}, '6': {'count': 103}, '8': {'count': 103}}}, 'train': {'num_samples': 70000, 'number_of_characters': 7760299, 'num_texts_in_train': None, 'min_text_length': 2, 'average_text_length': 110.86, 'max_text_length': 2422, 'unique_text': 68978, 'unique_labels': 20, 'labels': {'12': {'count': 3500}, '1': {'count': 3500}, '19': {'count': 3500}, '15': {'count': 3500}, '13': {'count': 3500}, '11': {'count': 3500}, '17': {'count': 3500}, '14': {'count': 3500}, '16': {'count': 3500}, '5': {'count': 3500}, '0': {'count': 3500}, '8': {'count': 3500}, '7': {'count': 3500}, '2': {'count': 3500}, '3': {'count': 3500}, '10': {'count': 3500}, '6': {'count': 3500}, '18': {'count': 3500}, '4': {'count': 3500}, '9': {'count': 3500}}}} | | [LccSentimentClassification](https://github.com/fnielsen/lcc-sentiment) | ['dan'] | Classification | s2s | [News, Web, Written] | None | None | | [LeCaRDv2](https://github.com/THUIR/LeCaRDv2) (Haitao Li, 2023) | ['zho'] | Retrieval | p2p | [Legal, Written] | None | None | | [LearnedHandsBenefitsLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | @@ -333,21 +333,21 @@ The following tables give you an overview of the tasks in MTEB. | [MassiveScenarioClassification](https://arxiv.org/abs/2204.08582) (Jack FitzGerald, 2022) | ['afr', 'amh', 'ara', 'aze', 'ben', 'cmo', 'cym', 'dan', 'deu', 'ell', 'eng', 'fas', 'fin', 'fra', 'heb', 'hin', 'hun', 'hye', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kan', 'kat', 'khm', 'kor', 'lav', 'mal', 'mon', 'msa', 'mya', 'nld', 'nob', 'pol', 'por', 'ron', 'rus', 'slv', 'spa', 'sqi', 'swa', 'swe', 'tam', 'tel', 'tgl', 'tha', 'tur', 'urd', 'vie'] | Classification | s2s | [Spoken] | None | None | | [MedicalQARetrieval](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-019-3119-4) (Asma et al., 2019) | ['eng'] | Retrieval | s2s | [Medical, Written] | None | None | | [MedicalRetrieval](https://arxiv.org/abs/2203.03367) | ['cmn'] | Retrieval | s2p | | None | None | -| [MedrxivClusteringP2P.v2](https://api.medrxiv.org/) | ['eng'] | Clustering | p2p | [Academic, Medical, Written] | None | None | -| [MedrxivClusteringS2S.v2](https://api.medrxiv.org/) | ['eng'] | Clustering | s2s | [Academic, Medical, Written] | None | None | +| [MedrxivClusteringP2P.v2](https://api.medrxiv.org/) | ['eng'] | Clustering | p2p | [Academic, Medical, Written] | {'test': 37500} | {'test': {'num_samples': 37500, 'number_of_characters': 74294927, 'min_text_length': 148, 'average_text_length': 1981.2, 'max_text_length': 38759, 'min_labels_per_text': 6, 'average_labels_per_text': 1.0, 'max_labels_per_text': 8830, 'unique_labels': 51, 'labels': {'epidemiology': {'count': 6656}, 'public and global health': {'count': 3595}, 'oncology': {'count': 845}, 'allergy and immunology': {'count': 464}, 'orthopedics': {'count': 104}, 'health informatics': {'count': 1107}, 'occupational and environmental health': {'count': 415}, 'infectious diseases': {'count': 8830}, 'genetic and genomic medicine': {'count': 1918}, 'health policy': {'count': 527}, 'gastroenterology': {'count': 343}, 'radiology and imaging': {'count': 541}, 'pain medicine': {'count': 121}, 'neurology': {'count': 1773}, 'primary care research': {'count': 232}, 'rheumatology': {'count': 189}, 'endocrinology': {'count': 419}, 'hematology': {'count': 202}, 'addiction medicine': {'count': 178}, 'pediatrics': {'count': 589}, 'cardiovascular medicine': {'count': 855}, 'obstetrics and gynecology': {'count': 373}, 'health systems and quality improvement': {'count': 491}, 'nephrology': {'count': 241}, 'respiratory medicine': {'count': 482}, 'geriatric medicine': {'count': 169}, 'dentistry and oral medicine': {'count': 159}, 'psychiatry and clinical psychology': {'count': 1781}, 'nutrition': {'count': 240}, 'intensive care and critical care medicine': {'count': 368}, 'rehabilitation medicine and physical therapy': {'count': 322}, 'otolaryngology': {'count': 166}, 'nursing': {'count': 93}, 'transplantation': {'count': 118}, 'health economics': {'count': 327}, 'sports medicine': {'count': 180}, 'hiv aids': {'count': 363}, 'dermatology': {'count': 98}, 'pathology': {'count': 223}, 'emergency medicine': {'count': 191}, 'pharmacology and therapeutics': {'count': 221}, 'ophthalmology': {'count': 220}, 'medical ethics': {'count': 46}, 'palliative medicine': {'count': 45}, 'sexual and reproductive health': {'count': 156}, 'medical education': {'count': 203}, 'surgery': {'count': 162}, 'urology': {'count': 65}, 'anesthesia': {'count': 72}, 'toxicology': {'count': 16}, 'forensic medicine': {'count': 6}}}} | +| [MedrxivClusteringS2S.v2](https://api.medrxiv.org/) | ['eng'] | Clustering | s2s | [Academic, Medical, Written] | {'test': 37500} | {'test': {'num_samples': 37500, 'number_of_characters': 4301276, 'min_text_length': 18, 'average_text_length': 114.7, 'max_text_length': 339, 'min_labels_per_text': 6, 'average_labels_per_text': 1.0, 'max_labels_per_text': 8830, 'unique_labels': 51, 'labels': {'epidemiology': {'count': 6656}, 'public and global health': {'count': 3595}, 'oncology': {'count': 845}, 'allergy and immunology': {'count': 464}, 'orthopedics': {'count': 104}, 'health informatics': {'count': 1107}, 'occupational and environmental health': {'count': 415}, 'infectious diseases': {'count': 8830}, 'genetic and genomic medicine': {'count': 1918}, 'health policy': {'count': 527}, 'gastroenterology': {'count': 343}, 'radiology and imaging': {'count': 541}, 'pain medicine': {'count': 121}, 'neurology': {'count': 1773}, 'primary care research': {'count': 232}, 'rheumatology': {'count': 189}, 'endocrinology': {'count': 419}, 'hematology': {'count': 202}, 'addiction medicine': {'count': 178}, 'pediatrics': {'count': 589}, 'cardiovascular medicine': {'count': 855}, 'obstetrics and gynecology': {'count': 373}, 'health systems and quality improvement': {'count': 491}, 'nephrology': {'count': 241}, 'respiratory medicine': {'count': 482}, 'geriatric medicine': {'count': 169}, 'dentistry and oral medicine': {'count': 159}, 'psychiatry and clinical psychology': {'count': 1781}, 'nutrition': {'count': 240}, 'intensive care and critical care medicine': {'count': 368}, 'rehabilitation medicine and physical therapy': {'count': 322}, 'otolaryngology': {'count': 166}, 'nursing': {'count': 93}, 'transplantation': {'count': 118}, 'health economics': {'count': 327}, 'sports medicine': {'count': 180}, 'hiv aids': {'count': 363}, 'dermatology': {'count': 98}, 'pathology': {'count': 223}, 'emergency medicine': {'count': 191}, 'pharmacology and therapeutics': {'count': 221}, 'ophthalmology': {'count': 220}, 'medical ethics': {'count': 46}, 'palliative medicine': {'count': 45}, 'sexual and reproductive health': {'count': 156}, 'medical education': {'count': 203}, 'surgery': {'count': 162}, 'urology': {'count': 65}, 'anesthesia': {'count': 72}, 'toxicology': {'count': 16}, 'forensic medicine': {'count': 6}}}} | | [MewsC16JaClustering](https://github.com/sbintuitions/JMTEB) | ['jpn'] | Clustering | s2s | [News, Written] | None | None | | [MindSmallReranking](https://msnews.github.io/assets/doc/ACL2020_MIND.pdf) | ['eng'] | Reranking | s2s | [News, Written] | None | None | | MintakaRetrieval | ['ara', 'deu', 'fra', 'hin', 'ita', 'jpn', 'por', 'spa'] | Retrieval | s2p | [Encyclopaedic, Written] | None | None | | [Moroco](https://huggingface.co/datasets/moroco) (Andrei M. Butnaru, 2019) | ['ron'] | Classification | s2s | [News, Written] | None | None | | [MovieReviewSentimentClassification](https://github.com/TheophileBlard/french-sentiment-analysis-with-bert) (Théophile Blard, 2020) | ['fra'] | Classification | s2s | [Reviews, Written] | None | None | | [MrTidyRetrieval](https://huggingface.co/datasets/castorini/mr-tydi) (Xinyu Zhang, 2021) | ['ara', 'ben', 'eng', 'fin', 'ind', 'jpn', 'kor', 'rus', 'swa', 'tel', 'tha'] | Retrieval | s2p | [Encyclopaedic, Written] | None | None | -| [MultiEURLEXMultilabelClassification](https://huggingface.co/datasets/coastalcph/multi_eurlex) (Chalkidis et al., 2021) | ['bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'est', 'fin', 'fra', 'hrv', 'hun', 'ita', 'lav', 'lit', 'mlt', 'nld', 'pol', 'por', 'ron', 'slk', 'slv', 'spa', 'swe'] | MultilabelClassification | p2p | [Legal, Government, Written] | {'test': 115000} | {'test': {'average_text_length': 12014.41, 'number_of_characters': 1381657027, 'average_label_per_text': 3.59, 'num_samples': 115000, 'unique_labels': 21, 'labels': {'18': {'count': 50784}, '15': {'count': 30981}, '5': {'count': 24978}, '6': {'count': 45080}, '3': {'count': 63687}, '17': {'count': 37743}, '1': {'count': 15019}, '20': {'count': 14030}, '0': {'count': 17802}, '2': {'count': 22402}, '19': {'count': 10212}, '9': {'count': 3772}, '4': {'count': 9062}, '10': {'count': 7705}, '11': {'count': 12213}, '7': {'count': 14306}, '12': {'count': 11799}, '8': {'count': 13800}, '13': {'count': 2346}, '14': {'count': 4255}, '16': {'count': 1311}}, 'hf_subset_descriptive_stats': {'en': {'average_text_length': 11720.29, 'number_of_characters': 58601463, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'de': {'average_text_length': 12865.42, 'number_of_characters': 64327081, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'fr': {'average_text_length': 13081.11, 'number_of_characters': 65405549, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'it': {'average_text_length': 12763.48, 'number_of_characters': 63817393, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'es': {'average_text_length': 13080.29, 'number_of_characters': 65401450, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'pl': {'average_text_length': 12282.59, 'number_of_characters': 61412963, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'ro': {'average_text_length': 12836.93, 'number_of_characters': 64184661, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'nl': {'average_text_length': 12857.97, 'number_of_characters': 64289871, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'el': {'average_text_length': 12998.14, 'number_of_characters': 64990715, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'hu': {'average_text_length': 12424.64, 'number_of_characters': 62123205, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'pt': {'average_text_length': 12482.46, 'number_of_characters': 62412308, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'cs': {'average_text_length': 10783.47, 'number_of_characters': 53917338, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'sv': {'average_text_length': 11612.48, 'number_of_characters': 58062387, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'bg': {'average_text_length': 12235.43, 'number_of_characters': 61177134, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'da': {'average_text_length': 11773.96, 'number_of_characters': 58869790, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'fi': {'average_text_length': 12087.69, 'number_of_characters': 60438431, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'sk': {'average_text_length': 11130.81, 'number_of_characters': 55654070, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'lt': {'average_text_length': 11245.36, 'number_of_characters': 56226783, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'hr': {'average_text_length': 11022.14, 'number_of_characters': 55110710, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'sl': {'average_text_length': 10620.06, 'number_of_characters': 53100297, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'et': {'average_text_length': 10898.43, 'number_of_characters': 54492156, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'lv': {'average_text_length': 10938.51, 'number_of_characters': 54692551, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'mt': {'average_text_length': 12589.74, 'number_of_characters': 62948721, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}}}} | +| [MultiEURLEXMultilabelClassification](https://huggingface.co/datasets/coastalcph/multi_eurlex) (Chalkidis et al., 2021) | ['bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'est', 'fin', 'fra', 'hrv', 'hun', 'ita', 'lav', 'lit', 'mlt', 'nld', 'pol', 'por', 'ron', 'slk', 'slv', 'spa', 'swe'] | MultilabelClassification | p2p | [Legal, Government, Written] | None | None | | [MultiHateClassification](https://aclanthology.org/2022.woah-1.15/) | ['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'nld', 'pol', 'por', 'spa'] | Classification | s2s | [Constructed, Written] | None | None | | [MultiLongDocRetrieval](https://arxiv.org/abs/2402.03216) (Jianlv Chen, 2024) | ['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'jpn', 'kor', 'por', 'rus', 'spa', 'tha'] | Retrieval | s2p | [Encyclopaedic, Written, Web, Non-fiction, Fiction] | None | None | | [MultilingualSentiment](https://github.com/tyqiangz/multilingual-sentiment-datasets) | ['cmn'] | Classification | s2s | | None | None | | [MultilingualSentimentClassification](https://huggingface.co/datasets/mteb/multilingual-sentiment-classification) | ['ara', 'bam', 'bul', 'cmn', 'cym', 'deu', 'dza', 'ell', 'eng', 'eus', 'fas', 'fin', 'heb', 'hrv', 'ind', 'jpn', 'kor', 'mlt', 'nor', 'pol', 'rus', 'slk', 'spa', 'tha', 'tur', 'uig', 'urd', 'vie', 'zho'] | Classification | s2s | [Reviews, Written] | None | None | | [MyanmarNews](https://huggingface.co/datasets/myanmar_news) (A. H. Khine, 2017) | ['mya'] | Classification | p2p | [News, Written] | None | None | -| [NFCorpus](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) (Boteva et al., 2016) | ['eng'] | Retrieval | s2p | | None | None | +| [NFCorpus](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) (Boteva et al., 2016) | ['eng'] | Retrieval | s2p | | {'test': 3956} | {'test': {'number_of_characters': 1612.55, 'num_samples': 3956, 'num_queries': 323, 'num_documents': 3633, 'average_document_length': 0.44, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 38.19}} | | [NFCorpus-PL](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | | None | None | | [NLPJournalAbsIntroRetrieval](https://github.com/sbintuitions/JMTEB) | ['jpn'] | Retrieval | s2s | [Academic, Written] | None | None | | [NLPJournalTitleAbsRetrieval](https://github.com/sbintuitions/JMTEB) | ['jpn'] | Retrieval | s2s | [Academic, Written] | None | None | @@ -356,7 +356,7 @@ The following tables give you an overview of the tasks in MTEB. | [NQ-PL](https://ai.google.com/research/NaturalQuestions/) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | | None | None | | [NQ-PLHardNegatives](https://ai.google.com/research/NaturalQuestions/) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | | None | None | | [NQHardNegatives](https://ai.google.com/research/NaturalQuestions/) (Tom Kwiatkowski, 2019) | ['eng'] | Retrieval | s2p | | None | None | -| [NTREXBitextMining](https://huggingface.co/datasets/davidstap/NTREX) | ['afr', 'amh', 'arb', 'aze', 'bak', 'bel', 'bem', 'ben', 'bod', 'bos', 'bul', 'cat', 'ces', 'ckb', 'cym', 'dan', 'deu', 'div', 'dzo', 'ell', 'eng', 'eus', 'ewe', 'fao', 'fas', 'fij', 'fil', 'fin', 'fra', 'fuc', 'gle', 'glg', 'guj', 'hau', 'heb', 'hin', 'hmn', 'hrv', 'hun', 'hye', 'ibo', 'ind', 'isl', 'ita', 'jpn', 'kan', 'kat', 'kaz', 'khm', 'kin', 'kir', 'kmr', 'kor', 'lao', 'lav', 'lit', 'ltz', 'mal', 'mar', 'mey', 'mkd', 'mlg', 'mlt', 'mon', 'mri', 'msa', 'mya', 'nde', 'nep', 'nld', 'nno', 'nob', 'nso', 'nya', 'orm', 'pan', 'pol', 'por', 'prs', 'pus', 'ron', 'rus', 'shi', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'spa', 'sqi', 'srp', 'ssw', 'swa', 'swe', 'tah', 'tam', 'tat', 'tel', 'tgk', 'tha', 'tir', 'ton', 'tsn', 'tuk', 'tur', 'uig', 'ukr', 'urd', 'uzb', 'ven', 'vie', 'wol', 'xho', 'yor', 'yue', 'zho', 'zul'] | BitextMining | s2s | [News, Written] | None | None | +| [NTREXBitextMining](https://huggingface.co/datasets/davidstap/NTREX) | ['afr', 'amh', 'arb', 'aze', 'bak', 'bel', 'bem', 'ben', 'bod', 'bos', 'bul', 'cat', 'ces', 'ckb', 'cym', 'dan', 'deu', 'div', 'dzo', 'ell', 'eng', 'eus', 'ewe', 'fao', 'fas', 'fij', 'fil', 'fin', 'fra', 'fuc', 'gle', 'glg', 'guj', 'hau', 'heb', 'hin', 'hmn', 'hrv', 'hun', 'hye', 'ibo', 'ind', 'isl', 'ita', 'jpn', 'kan', 'kat', 'kaz', 'khm', 'kin', 'kir', 'kmr', 'kor', 'lao', 'lav', 'lit', 'ltz', 'mal', 'mar', 'mey', 'mkd', 'mlg', 'mlt', 'mon', 'mri', 'msa', 'mya', 'nde', 'nep', 'nld', 'nno', 'nob', 'nso', 'nya', 'orm', 'pan', 'pol', 'por', 'prs', 'pus', 'ron', 'rus', 'shi', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'spa', 'sqi', 'srp', 'ssw', 'swa', 'swe', 'tah', 'tam', 'tat', 'tel', 'tgk', 'tha', 'tir', 'ton', 'tsn', 'tuk', 'tur', 'uig', 'ukr', 'urd', 'uzb', 'ven', 'vie', 'wol', 'xho', 'yor', 'yue', 'zho', 'zul'] | BitextMining | s2s | [News, Written] | {'test': 3826252} | {'test': {'num_samples': 3826252, 'number_of_characters': 988355274, 'unique_pairs': 3820263, 'min_sentence1_length': 1, 'average_sentence1_length': 129.15, 'max_sentence1_length': 773, 'unique_sentence1': 241259, 'min_sentence2_length': 1, 'average_sentence2_length': 129.15, 'max_sentence2_length': 773, 'unique_sentence2': 241259, 'hf_subset_descriptive_stats': {'afr_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 520490, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'afr_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 564002, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'afr_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 516072, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'afr_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 526155, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'afr_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 530560, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'afr_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 549109, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'afr_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 560267, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'afr_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 516709, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'afr_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 519796, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'afr_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 520179, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'amh_Ethi-eng_Latn': {'num_samples': 1997, 'number_of_characters': 415227, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'amh_Ethi-hau_Latn': {'num_samples': 1997, 'number_of_characters': 437473, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'amh_Ethi-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 413608, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'amh_Ethi-nso_Latn': {'num_samples': 1997, 'number_of_characters': 459006, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'amh_Ethi-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 404938, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'amh_Ethi-som_Latn': {'num_samples': 1997, 'number_of_characters': 458799, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'amh_Ethi-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 455649, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'amh_Ethi-swa_Latn': {'num_samples': 1997, 'number_of_characters': 440016, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'amh_Ethi-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 332745, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'amh_Ethi-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 501790, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'amh_Ethi-wol_Latn': {'num_samples': 1997, 'number_of_characters': 407310, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'amh_Ethi-xho_Latn': {'num_samples': 1997, 'number_of_characters': 435597, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'amh_Ethi-yor_Latn': {'num_samples': 1997, 'number_of_characters': 483595, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'amh_Ethi-zul_Latn': {'num_samples': 1997, 'number_of_characters': 425239, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'arb_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 474983, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'arb_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 483548, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'arb_Arab-deu_Latn': {'num_samples': 1997, 'number_of_characters': 526831, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'arb_Arab-ell_Grek': {'num_samples': 1997, 'number_of_characters': 530308, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'arb_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 478901, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'arb_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 474520, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'arb_Arab-fin_Latn': {'num_samples': 1997, 'number_of_characters': 500981, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'arb_Arab-fra_Latn': {'num_samples': 1997, 'number_of_characters': 524289, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'arb_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 431477, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'arb_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 492756, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'arb_Arab-hun_Latn': {'num_samples': 1997, 'number_of_characters': 509557, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'arb_Arab-ind_Latn': {'num_samples': 1997, 'number_of_characters': 518153, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'arb_Arab-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 342807, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'arb_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 477127, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'arb_Arab-kor_Hang': {'num_samples': 1997, 'number_of_characters': 364586, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'arb_Arab-lit_Latn': {'num_samples': 1997, 'number_of_characters': 490578, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'arb_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 445016, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'arb_Arab-nld_Latn': {'num_samples': 1997, 'number_of_characters': 523096, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'arb_Arab-pol_Latn': {'num_samples': 1997, 'number_of_characters': 509047, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'arb_Arab-por_Latn': {'num_samples': 1997, 'number_of_characters': 508396, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'arb_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 473717, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'arb_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 473814, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'arb_Arab-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 506074, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'arb_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 446094, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'arb_Arab-spa_Latn': {'num_samples': 1997, 'number_of_characters': 519381, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'arb_Arab-swa_Latn': {'num_samples': 1997, 'number_of_characters': 503690, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'arb_Arab-swe_Latn': {'num_samples': 1997, 'number_of_characters': 483008, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'arb_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 541142, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'arb_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 505328, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'arb_Arab-tur_Latn': {'num_samples': 1997, 'number_of_characters': 496794, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'arb_Arab-vie_Latn': {'num_samples': 1997, 'number_of_characters': 502302, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'arb_Arab-zho_Hant': {'num_samples': 1997, 'number_of_characters': 322659, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'arb_Arab-zul_Latn': {'num_samples': 1997, 'number_of_characters': 488913, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'aze_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 515960, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'aze_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517354, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'aze_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 529910, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'aze_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 520498, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'aze_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 515560, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'aze_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 554908, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'aze_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 535247, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'aze_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 580656, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'aze_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 563329, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'bak_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 515960, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'bak_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 494046, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bak_Cyrl-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 506602, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'bak_Cyrl-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 497190, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'bak_Cyrl-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 492252, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'bak_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 531600, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'bak_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 511939, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'bak_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 557348, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'bak_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 540021, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'bel_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 511000, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'bel_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 525979, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'bel_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 497408, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'bel_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 503810, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bel_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 512015, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'bel_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 523981, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'bel_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 533956, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'bel_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 530983, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'bel_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 509059, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'bel_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 508986, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'bel_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 508393, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'bel_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 512231, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'bel_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 518873, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'bem_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 546212, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bem_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 537470, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'bem_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 526972, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'bem_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 602279, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'bem_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 596231, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'bem_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 582774, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'bem_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 596822, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'bem_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 598248, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'ben_Beng-arb_Arab': {'num_samples': 1997, 'number_of_characters': 474983, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ben_Beng-deu_Latn': {'num_samples': 1997, 'number_of_characters': 539452, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ben_Beng-div_Thaa': {'num_samples': 1997, 'number_of_characters': 547650, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'ben_Beng-ell_Grek': {'num_samples': 1997, 'number_of_characters': 542929, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'ben_Beng-eng_Latn': {'num_samples': 1997, 'number_of_characters': 491522, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ben_Beng-eus_Latn': {'num_samples': 1997, 'number_of_characters': 519005, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'ben_Beng-fas_Arab': {'num_samples': 1997, 'number_of_characters': 487141, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ben_Beng-fin_Latn': {'num_samples': 1997, 'number_of_characters': 513602, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ben_Beng-fra_Latn': {'num_samples': 1997, 'number_of_characters': 536910, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ben_Beng-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 488733, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'ben_Beng-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 444098, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ben_Beng-hin_Deva': {'num_samples': 1997, 'number_of_characters': 505377, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'ben_Beng-hun_Latn': {'num_samples': 1997, 'number_of_characters': 522178, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ben_Beng-ind_Latn': {'num_samples': 1997, 'number_of_characters': 530774, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'ben_Beng-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 355428, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'ben_Beng-kan_Knda': {'num_samples': 1997, 'number_of_characters': 509338, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'ben_Beng-kor_Hang': {'num_samples': 1997, 'number_of_characters': 377207, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'ben_Beng-lit_Latn': {'num_samples': 1997, 'number_of_characters': 503199, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'ben_Beng-mar_Deva': {'num_samples': 1997, 'number_of_characters': 504689, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'ben_Beng-nep_Deva': {'num_samples': 1997, 'number_of_characters': 492025, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'ben_Beng-nld_Latn': {'num_samples': 1997, 'number_of_characters': 535717, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ben_Beng-pan_Guru': {'num_samples': 1997, 'number_of_characters': 494224, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'ben_Beng-pol_Latn': {'num_samples': 1997, 'number_of_characters': 521668, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ben_Beng-por_Latn': {'num_samples': 1997, 'number_of_characters': 521017, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ben_Beng-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 518695, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ben_Beng-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 502543, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'ben_Beng-snd_Arab': {'num_samples': 1997, 'number_of_characters': 464129, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'ben_Beng-spa_Latn': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ben_Beng-swa_Latn': {'num_samples': 1997, 'number_of_characters': 516311, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ben_Beng-swe_Latn': {'num_samples': 1997, 'number_of_characters': 495629, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ben_Beng-tam_Taml': {'num_samples': 1997, 'number_of_characters': 553763, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'ben_Beng-tel_Telu': {'num_samples': 1997, 'number_of_characters': 491329, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'ben_Beng-tur_Latn': {'num_samples': 1997, 'number_of_characters': 509415, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'ben_Beng-urd_Arab': {'num_samples': 1997, 'number_of_characters': 491800, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'ben_Beng-vie_Latn': {'num_samples': 1997, 'number_of_characters': 514923, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ben_Beng-zho_Hant': {'num_samples': 1997, 'number_of_characters': 335280, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'ben_Beng-zul_Latn': {'num_samples': 1997, 'number_of_characters': 501534, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'bod_Tibt-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 543850, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'bod_Tibt-eng_Latn': {'num_samples': 1997, 'number_of_characters': 548349, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bod_Tibt-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 589120, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'bod_Tibt-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 567609, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'bod_Tibt-mon_Mong': {'num_samples': 1997, 'number_of_characters': 559677, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'bod_Tibt-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 612483, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'bod_Tibt-tha_Thai': {'num_samples': 1997, 'number_of_characters': 538097, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'bos_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 511000, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'bos_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 524799, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'bos_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 496228, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'bos_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 502630, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bos_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 510835, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'bos_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 522801, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'bos_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 532776, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'bos_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 529803, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'bos_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 507879, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'bos_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 507806, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'bos_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 507213, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'bos_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 511051, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'bos_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 517693, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'bul_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 525979, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'bul_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 524799, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'bul_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 511207, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'bul_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517609, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bul_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 525814, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'bul_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 537780, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'bul_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 547755, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'bul_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 544782, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'bul_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 522858, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'bul_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 522785, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'bul_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 522192, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'bul_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 526030, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'bul_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 532672, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'cat_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 530680, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'cat_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 576068, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'cat_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 554946, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'cat_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 572177, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'cat_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 560435, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'cat_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 560175, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'cat_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 575445, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'cat_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 571160, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ces_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 497408, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'ces_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 496228, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'ces_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 511207, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'ces_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 489038, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ces_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 497243, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ces_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 509209, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'ces_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 519184, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ces_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 516211, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ces_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 494287, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'ces_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 494214, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ces_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 493621, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'ces_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 497459, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'ces_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 504101, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'ckb_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 483548, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ckb_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500087, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ckb_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 495706, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ckb_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 452663, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ckb_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 498313, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'ckb_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 466202, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'ckb_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 494903, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'ckb_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 495000, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'ckb_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 467280, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'ckb_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 526514, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'cym_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 514225, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.45, 'max_sentence1_length': 444, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'cym_Latn-gle_Latn': {'num_samples': 1997, 'number_of_characters': 561314, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.45, 'max_sentence1_length': 444, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 147.63, 'max_sentence2_length': 461, 'unique_sentence2': 1997}, 'dan_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 520490, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'dan_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 547788, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'dan_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 499858, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'dan_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 509941, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'dan_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 514346, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'dan_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 532895, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'dan_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 544053, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'dan_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 500495, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'dan_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 503582, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'dan_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 503965, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'deu_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 564002, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'deu_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 526831, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'deu_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 539452, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'deu_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 547788, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'deu_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 594777, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'deu_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 543370, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'deu_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 553453, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'deu_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 538989, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'deu_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 565450, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'deu_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 588758, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'deu_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 495946, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'deu_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 557225, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'deu_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 574026, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'deu_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 582622, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'deu_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 557858, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'deu_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 407276, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'deu_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 429055, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'deu_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 555047, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'deu_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 576407, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'deu_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 587565, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'deu_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 544007, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'deu_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 547094, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'deu_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 573516, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'deu_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 572865, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'deu_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 570543, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'deu_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 583850, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'deu_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 568159, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'deu_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 547477, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'deu_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 605611, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'deu_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 561263, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'deu_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 566771, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'deu_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 387128, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'deu_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 553382, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'div_Thaa-ben_Beng': {'num_samples': 1997, 'number_of_characters': 547650, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'div_Thaa-eng_Latn': {'num_samples': 1997, 'number_of_characters': 551568, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'div_Thaa-eus_Latn': {'num_samples': 1997, 'number_of_characters': 579051, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'div_Thaa-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 548779, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'div_Thaa-hin_Deva': {'num_samples': 1997, 'number_of_characters': 565423, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'div_Thaa-kan_Knda': {'num_samples': 1997, 'number_of_characters': 569384, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'div_Thaa-mar_Deva': {'num_samples': 1997, 'number_of_characters': 564735, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'div_Thaa-nep_Deva': {'num_samples': 1997, 'number_of_characters': 552071, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'div_Thaa-pan_Guru': {'num_samples': 1997, 'number_of_characters': 554270, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'div_Thaa-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 562589, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'div_Thaa-snd_Arab': {'num_samples': 1997, 'number_of_characters': 524175, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'div_Thaa-tam_Taml': {'num_samples': 1997, 'number_of_characters': 613809, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'div_Thaa-tel_Telu': {'num_samples': 1997, 'number_of_characters': 551375, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'div_Thaa-urd_Arab': {'num_samples': 1997, 'number_of_characters': 551846, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'dzo_Tibt-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 543850, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'dzo_Tibt-eng_Latn': {'num_samples': 1997, 'number_of_characters': 490941, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'dzo_Tibt-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 531712, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'dzo_Tibt-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 510201, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'dzo_Tibt-mon_Mong': {'num_samples': 1997, 'number_of_characters': 502269, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'dzo_Tibt-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 555075, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'dzo_Tibt-tha_Thai': {'num_samples': 1997, 'number_of_characters': 480689, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'ell_Grek-arb_Arab': {'num_samples': 1997, 'number_of_characters': 530308, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ell_Grek-ben_Beng': {'num_samples': 1997, 'number_of_characters': 542929, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'ell_Grek-deu_Latn': {'num_samples': 1997, 'number_of_characters': 594777, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ell_Grek-eng_Latn': {'num_samples': 1997, 'number_of_characters': 546847, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ell_Grek-fas_Arab': {'num_samples': 1997, 'number_of_characters': 542466, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ell_Grek-fin_Latn': {'num_samples': 1997, 'number_of_characters': 568927, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ell_Grek-fra_Latn': {'num_samples': 1997, 'number_of_characters': 592235, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ell_Grek-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 499423, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ell_Grek-hin_Deva': {'num_samples': 1997, 'number_of_characters': 560702, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'ell_Grek-hun_Latn': {'num_samples': 1997, 'number_of_characters': 577503, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ell_Grek-hye_Armn': {'num_samples': 1997, 'number_of_characters': 563842, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'ell_Grek-ind_Latn': {'num_samples': 1997, 'number_of_characters': 586099, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'ell_Grek-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 410753, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'ell_Grek-kat_Geor': {'num_samples': 1997, 'number_of_characters': 565719, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'ell_Grek-kor_Hang': {'num_samples': 1997, 'number_of_characters': 432532, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'ell_Grek-lit_Latn': {'num_samples': 1997, 'number_of_characters': 558524, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'ell_Grek-nld_Latn': {'num_samples': 1997, 'number_of_characters': 591042, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ell_Grek-pol_Latn': {'num_samples': 1997, 'number_of_characters': 576993, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ell_Grek-por_Latn': {'num_samples': 1997, 'number_of_characters': 576342, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ell_Grek-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 574020, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ell_Grek-spa_Latn': {'num_samples': 1997, 'number_of_characters': 587327, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ell_Grek-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 582734, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'ell_Grek-swa_Latn': {'num_samples': 1997, 'number_of_characters': 571636, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ell_Grek-swe_Latn': {'num_samples': 1997, 'number_of_characters': 550954, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ell_Grek-tam_Taml': {'num_samples': 1997, 'number_of_characters': 609088, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'ell_Grek-tur_Latn': {'num_samples': 1997, 'number_of_characters': 564740, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'ell_Grek-vie_Latn': {'num_samples': 1997, 'number_of_characters': 570248, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ell_Grek-zho_Hant': {'num_samples': 1997, 'number_of_characters': 390605, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'ell_Grek-zul_Latn': {'num_samples': 1997, 'number_of_characters': 556859, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'eng_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 516072, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'eng_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 415227, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'eng_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 478901, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'eng_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 517354, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'eng_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 494046, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'eng_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 503810, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'eng_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 546212, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'eng_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 491522, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'eng_Latn-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 548349, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'eng_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 502630, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'eng_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 517609, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'eng_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 530680, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'eng_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 489038, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'eng_Latn-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 500087, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'eng_Latn-cym_Latn': {'num_samples': 1997, 'number_of_characters': 514225, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.45, 'max_sentence2_length': 444, 'unique_sentence2': 1997}, 'eng_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 499858, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'eng_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 543370, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'eng_Latn-div_Thaa': {'num_samples': 1997, 'number_of_characters': 551568, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'eng_Latn-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 490941, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'eng_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 546847, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'eng_Latn-eus_Latn': {'num_samples': 1997, 'number_of_characters': 522923, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'eng_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 486698, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'eng_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 505523, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'eng_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 491059, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'eng_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 548225, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'eng_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 541140, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'eng_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 517520, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'eng_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 540828, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'eng_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 476200, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'eng_Latn-gle_Latn': {'num_samples': 1997, 'number_of_characters': 542529, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 147.63, 'max_sentence2_length': 461, 'unique_sentence2': 1997}, 'eng_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 519706, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'eng_Latn-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 492651, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'eng_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 517686, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'eng_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 448016, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'eng_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509295, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'eng_Latn-hmn_Latn': {'num_samples': 1997, 'number_of_characters': 578510, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 165.64, 'max_sentence2_length': 643, 'unique_sentence2': 1997}, 'eng_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 503645, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'eng_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 526096, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'eng_Latn-hye_Armn': {'num_samples': 1997, 'number_of_characters': 512435, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'eng_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 493821, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'eng_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 534692, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'eng_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 509928, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'eng_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 536937, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'eng_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 359346, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'eng_Latn-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513256, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'eng_Latn-kat_Geor': {'num_samples': 1997, 'number_of_characters': 514312, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'eng_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 507996, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'eng_Latn-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 536211, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'eng_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 551507, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'eng_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 498584, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'eng_Latn-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 493666, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'eng_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 381125, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'eng_Latn-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 514700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'eng_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 515908, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'eng_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 507117, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'eng_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 528477, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'eng_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 551872, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'eng_Latn-mar_Deva': {'num_samples': 1997, 'number_of_characters': 508607, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'eng_Latn-mey_Arab': {'num_samples': 1997, 'number_of_characters': 461555, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'eng_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 515611, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'eng_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 568028, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'eng_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 525195, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'eng_Latn-mon_Mong': {'num_samples': 1997, 'number_of_characters': 506768, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'eng_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 521844, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'eng_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 524903, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'eng_Latn-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 559574, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'eng_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 545459, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'eng_Latn-nep_Deva': {'num_samples': 1997, 'number_of_characters': 495943, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eng_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 539635, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'eng_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 496077, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'eng_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 499164, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'eng_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 539219, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'eng_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'eng_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 485151, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'eng_Latn-pan_Guru': {'num_samples': 1997, 'number_of_characters': 498142, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'eng_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 525586, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'eng_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 524935, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'eng_Latn-prs_Arab': {'num_samples': 1997, 'number_of_characters': 490256, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'eng_Latn-pus_Arab': {'num_samples': 1997, 'number_of_characters': 490353, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'eng_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 540205, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'eng_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 522613, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'eng_Latn-shi_Arab': {'num_samples': 1997, 'number_of_characters': 462633, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'eng_Latn-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506461, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'eng_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 500689, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'eng_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 500616, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'eng_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 525575, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'eng_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 546050, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'eng_Latn-snd_Arab': {'num_samples': 1997, 'number_of_characters': 468047, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'eng_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 539012, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'eng_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 535920, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'eng_Latn-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 531327, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'eng_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 500023, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'eng_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 503861, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'eng_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 535862, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'eng_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 520229, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'eng_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 499547, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'eng_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 557343, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'eng_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 557681, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'eng_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 493646, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'eng_Latn-tel_Telu': {'num_samples': 1997, 'number_of_characters': 495247, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eng_Latn-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 521867, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'eng_Latn-tha_Thai': {'num_samples': 1997, 'number_of_characters': 485188, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'eng_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 412958, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'eng_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 561360, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'eng_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 582003, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'eng_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 532994, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'eng_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 513333, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'eng_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 558742, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'eng_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 510503, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'eng_Latn-urd_Arab': {'num_samples': 1997, 'number_of_characters': 495718, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'eng_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 541415, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'eng_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 547476, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'eng_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 518841, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'eng_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 487523, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'eng_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 515810, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'eng_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 563808, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'eng_Latn-yue_Hant': {'num_samples': 1997, 'number_of_characters': 326607, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'eng_Latn-zho_Hans': {'num_samples': 1997, 'number_of_characters': 332681, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'eng_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 339198, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'eng_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 505452, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'eus_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 519005, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'eus_Latn-div_Thaa': {'num_samples': 1997, 'number_of_characters': 579051, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'eus_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 522923, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'eus_Latn-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 520134, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'eus_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 536778, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'eus_Latn-kan_Knda': {'num_samples': 1997, 'number_of_characters': 540739, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'eus_Latn-mar_Deva': {'num_samples': 1997, 'number_of_characters': 536090, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'eus_Latn-nep_Deva': {'num_samples': 1997, 'number_of_characters': 523426, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eus_Latn-pan_Guru': {'num_samples': 1997, 'number_of_characters': 525625, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'eus_Latn-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 533944, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'eus_Latn-snd_Arab': {'num_samples': 1997, 'number_of_characters': 495530, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'eus_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 585164, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'eus_Latn-tel_Telu': {'num_samples': 1997, 'number_of_characters': 522730, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eus_Latn-urd_Arab': {'num_samples': 1997, 'number_of_characters': 523201, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'ewe_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 537470, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'ewe_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 486698, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ewe_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 467458, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'ewe_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 542765, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'ewe_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 536717, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'ewe_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 523260, 'unique_pairs': 1995, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'ewe_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 537308, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'ewe_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 538734, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'fao_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 526155, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fao_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 509941, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'fao_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 553453, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fao_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 505523, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fao_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 520011, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'fao_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 538560, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'fao_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 549718, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fao_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 506160, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'fao_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 509247, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'fao_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 509630, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fas_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 474520, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'fas_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 487141, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'fas_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 495706, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'fas_Arab-deu_Latn': {'num_samples': 1997, 'number_of_characters': 538989, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fas_Arab-ell_Grek': {'num_samples': 1997, 'number_of_characters': 542466, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'fas_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 491059, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fas_Arab-fin_Latn': {'num_samples': 1997, 'number_of_characters': 513139, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'fas_Arab-fra_Latn': {'num_samples': 1997, 'number_of_characters': 536447, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'fas_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 443635, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'fas_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 504914, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'fas_Arab-hun_Latn': {'num_samples': 1997, 'number_of_characters': 521715, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'fas_Arab-ind_Latn': {'num_samples': 1997, 'number_of_characters': 530311, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fas_Arab-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 354965, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'fas_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 489285, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'fas_Arab-kor_Hang': {'num_samples': 1997, 'number_of_characters': 376744, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'fas_Arab-lit_Latn': {'num_samples': 1997, 'number_of_characters': 502736, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'fas_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 457174, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'fas_Arab-nld_Latn': {'num_samples': 1997, 'number_of_characters': 535254, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fas_Arab-pol_Latn': {'num_samples': 1997, 'number_of_characters': 521205, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fas_Arab-por_Latn': {'num_samples': 1997, 'number_of_characters': 520554, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'fas_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 485875, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'fas_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 485972, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'fas_Arab-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 518232, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'fas_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 458252, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'fas_Arab-spa_Latn': {'num_samples': 1997, 'number_of_characters': 531539, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'fas_Arab-swa_Latn': {'num_samples': 1997, 'number_of_characters': 515848, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'fas_Arab-swe_Latn': {'num_samples': 1997, 'number_of_characters': 495166, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fas_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 553300, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'fas_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 517486, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'fas_Arab-tur_Latn': {'num_samples': 1997, 'number_of_characters': 508952, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'fas_Arab-vie_Latn': {'num_samples': 1997, 'number_of_characters': 514460, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fas_Arab-zho_Hant': {'num_samples': 1997, 'number_of_characters': 334817, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'fas_Arab-zul_Latn': {'num_samples': 1997, 'number_of_characters': 501071, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'fij_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 548225, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fij_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 593925, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'fij_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 587477, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fij_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 604657, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'fij_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 620813, 'unique_pairs': 1995, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'fij_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 574629, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'fij_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 577688, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'fij_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 578360, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'fij_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 610128, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'fij_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 614145, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'fil_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 541140, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fil_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 593925, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'fil_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 580392, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fil_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 597572, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'fil_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 613728, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'fil_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 567544, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'fil_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 570603, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'fil_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 571275, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'fil_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 603043, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'fil_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 607060, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'fin_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 500981, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'fin_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 513602, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'fin_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 565450, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fin_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 568927, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'fin_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517520, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fin_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 513139, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'fin_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 562908, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'fin_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 470096, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'fin_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 531375, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'fin_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 548176, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'fin_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 556772, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fin_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 381426, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'fin_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 403205, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'fin_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 537988, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'fin_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 529197, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'fin_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 561715, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fin_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 547666, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fin_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 547015, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'fin_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 544693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'fin_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 558000, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'fin_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 542309, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'fin_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 521627, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fin_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 579761, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'fin_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 535413, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'fin_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 540921, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fin_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 361278, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'fin_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 527532, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'fra_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 524289, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'fra_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 536910, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'fra_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 576068, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'fra_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 588758, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fra_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 592235, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'fra_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 540828, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fra_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 536447, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'fra_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 562908, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'fra_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 565094, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fra_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 493404, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'fra_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 554683, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'fra_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 571484, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'fra_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 580080, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fra_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 582325, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'fra_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 404734, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'fra_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 426513, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'fra_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 552505, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'fra_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 570583, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'fra_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 585023, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fra_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 570974, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fra_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 570323, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'fra_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 585593, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'fra_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 568001, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'fra_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 581308, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'fra_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 565617, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'fra_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 544935, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fra_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 603069, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'fra_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 558721, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'fra_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 564229, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fra_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 384586, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'fra_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 550840, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'fuc_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 526972, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'fuc_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 476200, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fuc_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 467458, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'fuc_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 532267, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'fuc_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 526219, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'fuc_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 512762, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'fuc_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 526810, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'fuc_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 528236, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'gle_Latn-cym_Latn': {'num_samples': 1997, 'number_of_characters': 561314, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 147.63, 'max_sentence1_length': 461, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.45, 'max_sentence2_length': 444, 'unique_sentence2': 1997}, 'gle_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 542529, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 147.63, 'max_sentence1_length': 461, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'glg_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 554946, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'glg_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 519706, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'glg_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 565094, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'glg_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 561203, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'glg_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 549461, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'glg_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 549201, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'glg_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 564471, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'glg_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 560186, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'guj_Gujr-ben_Beng': {'num_samples': 1997, 'number_of_characters': 488733, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'guj_Gujr-div_Thaa': {'num_samples': 1997, 'number_of_characters': 548779, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'guj_Gujr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 492651, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'guj_Gujr-eus_Latn': {'num_samples': 1997, 'number_of_characters': 520134, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'guj_Gujr-hin_Deva': {'num_samples': 1997, 'number_of_characters': 506506, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'guj_Gujr-kan_Knda': {'num_samples': 1997, 'number_of_characters': 510467, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'guj_Gujr-mar_Deva': {'num_samples': 1997, 'number_of_characters': 505818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'guj_Gujr-nep_Deva': {'num_samples': 1997, 'number_of_characters': 493154, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'guj_Gujr-pan_Guru': {'num_samples': 1997, 'number_of_characters': 495353, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'guj_Gujr-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 503672, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'guj_Gujr-snd_Arab': {'num_samples': 1997, 'number_of_characters': 465258, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'guj_Gujr-tam_Taml': {'num_samples': 1997, 'number_of_characters': 554892, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'guj_Gujr-tel_Telu': {'num_samples': 1997, 'number_of_characters': 492458, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'guj_Gujr-urd_Arab': {'num_samples': 1997, 'number_of_characters': 492929, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'hau_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 437473, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'hau_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517686, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hau_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 516067, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'hau_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 561465, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'hau_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 507397, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'hau_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 561258, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'hau_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 558108, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'hau_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 542475, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'hau_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 435204, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'hau_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 604249, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'hau_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 509769, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'hau_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 538056, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'hau_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 586054, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'hau_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 527698, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'heb_Hebr-arb_Arab': {'num_samples': 1997, 'number_of_characters': 431477, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'heb_Hebr-ben_Beng': {'num_samples': 1997, 'number_of_characters': 444098, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'heb_Hebr-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 452663, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'heb_Hebr-deu_Latn': {'num_samples': 1997, 'number_of_characters': 495946, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'heb_Hebr-ell_Grek': {'num_samples': 1997, 'number_of_characters': 499423, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'heb_Hebr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 448016, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'heb_Hebr-fas_Arab': {'num_samples': 1997, 'number_of_characters': 443635, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'heb_Hebr-fin_Latn': {'num_samples': 1997, 'number_of_characters': 470096, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'heb_Hebr-fra_Latn': {'num_samples': 1997, 'number_of_characters': 493404, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'heb_Hebr-hin_Deva': {'num_samples': 1997, 'number_of_characters': 461871, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'heb_Hebr-hun_Latn': {'num_samples': 1997, 'number_of_characters': 478672, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'heb_Hebr-ind_Latn': {'num_samples': 1997, 'number_of_characters': 487268, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'heb_Hebr-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 311922, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'heb_Hebr-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 446242, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'heb_Hebr-kor_Hang': {'num_samples': 1997, 'number_of_characters': 333701, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'heb_Hebr-lit_Latn': {'num_samples': 1997, 'number_of_characters': 459693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'heb_Hebr-mey_Arab': {'num_samples': 1997, 'number_of_characters': 414131, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'heb_Hebr-nld_Latn': {'num_samples': 1997, 'number_of_characters': 492211, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'heb_Hebr-pol_Latn': {'num_samples': 1997, 'number_of_characters': 478162, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'heb_Hebr-por_Latn': {'num_samples': 1997, 'number_of_characters': 477511, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'heb_Hebr-prs_Arab': {'num_samples': 1997, 'number_of_characters': 442832, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'heb_Hebr-pus_Arab': {'num_samples': 1997, 'number_of_characters': 442929, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'heb_Hebr-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 475189, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'heb_Hebr-shi_Arab': {'num_samples': 1997, 'number_of_characters': 415209, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'heb_Hebr-spa_Latn': {'num_samples': 1997, 'number_of_characters': 488496, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'heb_Hebr-swa_Latn': {'num_samples': 1997, 'number_of_characters': 472805, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'heb_Hebr-swe_Latn': {'num_samples': 1997, 'number_of_characters': 452123, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'heb_Hebr-tam_Taml': {'num_samples': 1997, 'number_of_characters': 510257, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'heb_Hebr-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 474443, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'heb_Hebr-tur_Latn': {'num_samples': 1997, 'number_of_characters': 465909, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'heb_Hebr-vie_Latn': {'num_samples': 1997, 'number_of_characters': 471417, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'heb_Hebr-zho_Hant': {'num_samples': 1997, 'number_of_characters': 291774, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'heb_Hebr-zul_Latn': {'num_samples': 1997, 'number_of_characters': 458028, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'hin_Deva-arb_Arab': {'num_samples': 1997, 'number_of_characters': 492756, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'hin_Deva-ben_Beng': {'num_samples': 1997, 'number_of_characters': 505377, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'hin_Deva-deu_Latn': {'num_samples': 1997, 'number_of_characters': 557225, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'hin_Deva-div_Thaa': {'num_samples': 1997, 'number_of_characters': 565423, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'hin_Deva-ell_Grek': {'num_samples': 1997, 'number_of_characters': 560702, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'hin_Deva-eng_Latn': {'num_samples': 1997, 'number_of_characters': 509295, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hin_Deva-eus_Latn': {'num_samples': 1997, 'number_of_characters': 536778, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'hin_Deva-fas_Arab': {'num_samples': 1997, 'number_of_characters': 504914, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'hin_Deva-fin_Latn': {'num_samples': 1997, 'number_of_characters': 531375, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'hin_Deva-fra_Latn': {'num_samples': 1997, 'number_of_characters': 554683, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'hin_Deva-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 506506, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'hin_Deva-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 461871, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'hin_Deva-hun_Latn': {'num_samples': 1997, 'number_of_characters': 539951, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'hin_Deva-ind_Latn': {'num_samples': 1997, 'number_of_characters': 548547, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'hin_Deva-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 373201, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'hin_Deva-kan_Knda': {'num_samples': 1997, 'number_of_characters': 527111, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'hin_Deva-kor_Hang': {'num_samples': 1997, 'number_of_characters': 394980, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'hin_Deva-lit_Latn': {'num_samples': 1997, 'number_of_characters': 520972, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'hin_Deva-mar_Deva': {'num_samples': 1997, 'number_of_characters': 522462, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'hin_Deva-nep_Deva': {'num_samples': 1997, 'number_of_characters': 509798, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'hin_Deva-nld_Latn': {'num_samples': 1997, 'number_of_characters': 553490, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'hin_Deva-pan_Guru': {'num_samples': 1997, 'number_of_characters': 511997, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'hin_Deva-pol_Latn': {'num_samples': 1997, 'number_of_characters': 539441, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'hin_Deva-por_Latn': {'num_samples': 1997, 'number_of_characters': 538790, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'hin_Deva-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 536468, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'hin_Deva-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 520316, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'hin_Deva-snd_Arab': {'num_samples': 1997, 'number_of_characters': 481902, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'hin_Deva-spa_Latn': {'num_samples': 1997, 'number_of_characters': 549775, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'hin_Deva-swa_Latn': {'num_samples': 1997, 'number_of_characters': 534084, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'hin_Deva-swe_Latn': {'num_samples': 1997, 'number_of_characters': 513402, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'hin_Deva-tam_Taml': {'num_samples': 1997, 'number_of_characters': 571536, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'hin_Deva-tel_Telu': {'num_samples': 1997, 'number_of_characters': 509102, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'hin_Deva-tur_Latn': {'num_samples': 1997, 'number_of_characters': 527188, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'hin_Deva-urd_Arab': {'num_samples': 1997, 'number_of_characters': 509573, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'hin_Deva-vie_Latn': {'num_samples': 1997, 'number_of_characters': 532696, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'hin_Deva-zho_Hant': {'num_samples': 1997, 'number_of_characters': 353053, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'hin_Deva-zul_Latn': {'num_samples': 1997, 'number_of_characters': 519307, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'hmn_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 578510, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 165.64, 'max_sentence1_length': 643, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hrv_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 512015, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'hrv_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 510835, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'hrv_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 525814, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'hrv_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 497243, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'hrv_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 503645, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hrv_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 523816, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'hrv_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 533791, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'hrv_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 530818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'hrv_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 508894, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'hrv_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 508821, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'hrv_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 508228, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'hrv_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 512066, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'hrv_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 518708, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'hun_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 509557, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'hun_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 522178, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'hun_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 574026, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'hun_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 577503, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'hun_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 526096, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hun_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 521715, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'hun_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 548176, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'hun_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 571484, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'hun_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 478672, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'hun_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 539951, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'hun_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 565348, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'hun_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 390002, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'hun_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 411781, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'hun_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 546564, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'hun_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 537773, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'hun_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 570291, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'hun_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 556242, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'hun_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 555591, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'hun_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 553269, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'hun_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 566576, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'hun_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 550885, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'hun_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 530203, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'hun_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 588337, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'hun_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 543989, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'hun_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 549497, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'hun_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 369854, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'hun_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 536108, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'hye_Armn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 563842, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'hye_Armn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 512435, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hye_Armn-kat_Geor': {'num_samples': 1997, 'number_of_characters': 531307, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'hye_Armn-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 548322, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'ibo_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 413608, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'ibo_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 493821, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ibo_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 516067, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'ibo_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 537600, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'ibo_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 483532, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'ibo_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 537393, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'ibo_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 534243, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'ibo_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 518610, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ibo_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 411339, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'ibo_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 580384, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'ibo_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 485904, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'ibo_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 514191, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'ibo_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 562189, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ibo_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 503833, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'ind_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 518153, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ind_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 530774, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'ind_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 582622, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ind_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 586099, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'ind_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 534692, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ind_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 530311, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ind_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 587477, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'ind_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 580392, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'ind_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 556772, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ind_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 580080, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ind_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 487268, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ind_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 548547, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'ind_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 565348, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ind_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 398598, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'ind_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 420377, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'ind_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 546369, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'ind_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 591124, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'ind_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 607280, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'ind_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 561096, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'ind_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 564155, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'ind_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 578887, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ind_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 564838, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ind_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 564187, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ind_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 561865, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ind_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 564827, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'ind_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 575172, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ind_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 559481, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ind_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 538799, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ind_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 596595, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'ind_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 596933, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'ind_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 600612, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'ind_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 552585, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'ind_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 558093, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ind_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 378450, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'ind_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 544704, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'isl_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 530560, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'isl_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 514346, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'isl_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 557858, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'isl_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 509928, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'isl_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 520011, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'isl_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 542965, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'isl_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 554123, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'isl_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 510565, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'isl_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 513652, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'isl_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 514035, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ita_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 572177, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'ita_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 536937, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ita_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 582325, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ita_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 561203, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ita_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 566692, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ita_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 566432, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ita_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 581702, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'ita_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 577417, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'jpn_Jpan-arb_Arab': {'num_samples': 1997, 'number_of_characters': 342807, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'jpn_Jpan-ben_Beng': {'num_samples': 1997, 'number_of_characters': 355428, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'jpn_Jpan-deu_Latn': {'num_samples': 1997, 'number_of_characters': 407276, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'jpn_Jpan-ell_Grek': {'num_samples': 1997, 'number_of_characters': 410753, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'jpn_Jpan-eng_Latn': {'num_samples': 1997, 'number_of_characters': 359346, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'jpn_Jpan-fas_Arab': {'num_samples': 1997, 'number_of_characters': 354965, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'jpn_Jpan-fin_Latn': {'num_samples': 1997, 'number_of_characters': 381426, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'jpn_Jpan-fra_Latn': {'num_samples': 1997, 'number_of_characters': 404734, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'jpn_Jpan-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 311922, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'jpn_Jpan-hin_Deva': {'num_samples': 1997, 'number_of_characters': 373201, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'jpn_Jpan-hun_Latn': {'num_samples': 1997, 'number_of_characters': 390002, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'jpn_Jpan-ind_Latn': {'num_samples': 1997, 'number_of_characters': 398598, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'jpn_Jpan-kor_Hang': {'num_samples': 1997, 'number_of_characters': 245031, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'jpn_Jpan-lit_Latn': {'num_samples': 1997, 'number_of_characters': 371023, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'jpn_Jpan-nld_Latn': {'num_samples': 1997, 'number_of_characters': 403541, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'jpn_Jpan-pol_Latn': {'num_samples': 1997, 'number_of_characters': 389492, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'jpn_Jpan-por_Latn': {'num_samples': 1997, 'number_of_characters': 388841, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'jpn_Jpan-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 386519, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'jpn_Jpan-spa_Latn': {'num_samples': 1997, 'number_of_characters': 399826, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'jpn_Jpan-swa_Latn': {'num_samples': 1997, 'number_of_characters': 384135, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'jpn_Jpan-swe_Latn': {'num_samples': 1997, 'number_of_characters': 363453, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'jpn_Jpan-tam_Taml': {'num_samples': 1997, 'number_of_characters': 421587, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'jpn_Jpan-tur_Latn': {'num_samples': 1997, 'number_of_characters': 377239, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'jpn_Jpan-vie_Latn': {'num_samples': 1997, 'number_of_characters': 382747, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'jpn_Jpan-yue_Hant': {'num_samples': 1997, 'number_of_characters': 190513, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'jpn_Jpan-zho_Hans': {'num_samples': 1997, 'number_of_characters': 196587, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'jpn_Jpan-zho_Hant': {'num_samples': 1997, 'number_of_characters': 203104, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'jpn_Jpan-zul_Latn': {'num_samples': 1997, 'number_of_characters': 369358, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'kan_Knda-ben_Beng': {'num_samples': 1997, 'number_of_characters': 509338, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'kan_Knda-div_Thaa': {'num_samples': 1997, 'number_of_characters': 569384, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'kan_Knda-eng_Latn': {'num_samples': 1997, 'number_of_characters': 513256, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kan_Knda-eus_Latn': {'num_samples': 1997, 'number_of_characters': 540739, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'kan_Knda-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 510467, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'kan_Knda-hin_Deva': {'num_samples': 1997, 'number_of_characters': 527111, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'kan_Knda-mar_Deva': {'num_samples': 1997, 'number_of_characters': 526423, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'kan_Knda-nep_Deva': {'num_samples': 1997, 'number_of_characters': 513759, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'kan_Knda-pan_Guru': {'num_samples': 1997, 'number_of_characters': 515958, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'kan_Knda-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 524277, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'kan_Knda-snd_Arab': {'num_samples': 1997, 'number_of_characters': 485863, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'kan_Knda-tam_Taml': {'num_samples': 1997, 'number_of_characters': 575497, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'kan_Knda-tel_Telu': {'num_samples': 1997, 'number_of_characters': 513063, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'kan_Knda-urd_Arab': {'num_samples': 1997, 'number_of_characters': 513534, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'kat_Geor-ell_Grek': {'num_samples': 1997, 'number_of_characters': 565719, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'kat_Geor-eng_Latn': {'num_samples': 1997, 'number_of_characters': 514312, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kat_Geor-hye_Armn': {'num_samples': 1997, 'number_of_characters': 531307, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'kat_Geor-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 550199, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'kaz_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 529910, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'kaz_Cyrl-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 506602, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'kaz_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 507996, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kaz_Cyrl-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 511140, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'kaz_Cyrl-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 506202, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'kaz_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 545550, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'kaz_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 525889, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'kaz_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 571298, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'kaz_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 553971, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'khm_Khmr-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 589120, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'khm_Khmr-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 531712, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'khm_Khmr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 536211, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'khm_Khmr-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 555471, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'khm_Khmr-mon_Mong': {'num_samples': 1997, 'number_of_characters': 547539, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'khm_Khmr-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 600345, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'khm_Khmr-tha_Thai': {'num_samples': 1997, 'number_of_characters': 525959, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'kin_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 602279, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'kin_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 551507, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kin_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 542765, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'kin_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 532267, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'kin_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 601526, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'kin_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 588069, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'kin_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 602117, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'kin_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 603543, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'kir_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 520498, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'kir_Cyrl-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 497190, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'kir_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 498584, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kir_Cyrl-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 511140, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'kir_Cyrl-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 496790, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'kir_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 536138, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'kir_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 516477, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'kir_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 561886, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'kir_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 544559, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'kmr_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 477127, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'kmr_Latn-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 498313, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'kmr_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 493666, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kmr_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 489285, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'kmr_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 446242, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'kmr_Latn-mey_Arab': {'num_samples': 1997, 'number_of_characters': 459781, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'kmr_Latn-prs_Arab': {'num_samples': 1997, 'number_of_characters': 488482, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'kmr_Latn-pus_Arab': {'num_samples': 1997, 'number_of_characters': 488579, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'kmr_Latn-shi_Arab': {'num_samples': 1997, 'number_of_characters': 460859, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'kmr_Latn-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 520093, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'kor_Hang-arb_Arab': {'num_samples': 1997, 'number_of_characters': 364586, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'kor_Hang-ben_Beng': {'num_samples': 1997, 'number_of_characters': 377207, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'kor_Hang-deu_Latn': {'num_samples': 1997, 'number_of_characters': 429055, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'kor_Hang-ell_Grek': {'num_samples': 1997, 'number_of_characters': 432532, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'kor_Hang-eng_Latn': {'num_samples': 1997, 'number_of_characters': 381125, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kor_Hang-fas_Arab': {'num_samples': 1997, 'number_of_characters': 376744, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'kor_Hang-fin_Latn': {'num_samples': 1997, 'number_of_characters': 403205, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'kor_Hang-fra_Latn': {'num_samples': 1997, 'number_of_characters': 426513, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'kor_Hang-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 333701, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'kor_Hang-hin_Deva': {'num_samples': 1997, 'number_of_characters': 394980, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'kor_Hang-hun_Latn': {'num_samples': 1997, 'number_of_characters': 411781, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'kor_Hang-ind_Latn': {'num_samples': 1997, 'number_of_characters': 420377, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'kor_Hang-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 245031, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'kor_Hang-lit_Latn': {'num_samples': 1997, 'number_of_characters': 392802, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'kor_Hang-nld_Latn': {'num_samples': 1997, 'number_of_characters': 425320, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'kor_Hang-pol_Latn': {'num_samples': 1997, 'number_of_characters': 411271, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'kor_Hang-por_Latn': {'num_samples': 1997, 'number_of_characters': 410620, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'kor_Hang-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 408298, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'kor_Hang-spa_Latn': {'num_samples': 1997, 'number_of_characters': 421605, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'kor_Hang-swa_Latn': {'num_samples': 1997, 'number_of_characters': 405914, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'kor_Hang-swe_Latn': {'num_samples': 1997, 'number_of_characters': 385232, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'kor_Hang-tam_Taml': {'num_samples': 1997, 'number_of_characters': 443366, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'kor_Hang-tur_Latn': {'num_samples': 1997, 'number_of_characters': 399018, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'kor_Hang-vie_Latn': {'num_samples': 1997, 'number_of_characters': 404526, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'kor_Hang-yue_Hant': {'num_samples': 1997, 'number_of_characters': 212292, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'kor_Hang-zho_Hans': {'num_samples': 1997, 'number_of_characters': 218366, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'kor_Hang-zho_Hant': {'num_samples': 1997, 'number_of_characters': 224883, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'kor_Hang-zul_Latn': {'num_samples': 1997, 'number_of_characters': 391137, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'lao_Laoo-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 567609, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'lao_Laoo-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 510201, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'lao_Laoo-eng_Latn': {'num_samples': 1997, 'number_of_characters': 514700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'lao_Laoo-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 555471, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'lao_Laoo-mon_Mong': {'num_samples': 1997, 'number_of_characters': 526028, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'lao_Laoo-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 578834, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'lao_Laoo-tha_Thai': {'num_samples': 1997, 'number_of_characters': 504448, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'lav_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 515908, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'lav_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 537988, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'lav_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 546564, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'lav_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 527585, 'unique_pairs': 1995, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'lit_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 490578, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'lit_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 503199, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'lit_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 555047, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'lit_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 558524, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'lit_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 507117, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'lit_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 502736, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'lit_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 529197, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'lit_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 552505, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'lit_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 459693, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'lit_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 520972, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'lit_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 537773, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'lit_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 546369, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'lit_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 371023, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'lit_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 392802, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'lit_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 527585, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'lit_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 551312, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'lit_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 537263, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'lit_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 536612, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'lit_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 534290, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'lit_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 547597, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'lit_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 531906, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'lit_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 511224, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'lit_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 569358, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'lit_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 525010, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'lit_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 530518, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'lit_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 350875, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'lit_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 517129, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'ltz_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 549109, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ltz_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 532895, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'ltz_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 576407, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ltz_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 528477, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ltz_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 538560, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'ltz_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 542965, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'ltz_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 572672, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ltz_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 529114, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'ltz_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 532201, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'ltz_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 532584, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'mal_Mlym-eng_Latn': {'num_samples': 1997, 'number_of_characters': 551872, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mal_Mlym-fij_Latn': {'num_samples': 1997, 'number_of_characters': 604657, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'mal_Mlym-fil_Latn': {'num_samples': 1997, 'number_of_characters': 597572, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'mal_Mlym-ind_Latn': {'num_samples': 1997, 'number_of_characters': 591124, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'mal_Mlym-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 624460, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'mal_Mlym-mri_Latn': {'num_samples': 1997, 'number_of_characters': 578276, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'mal_Mlym-msa_Latn': {'num_samples': 1997, 'number_of_characters': 581335, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'mal_Mlym-smo_Latn': {'num_samples': 1997, 'number_of_characters': 582007, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'mal_Mlym-tah_Latn': {'num_samples': 1997, 'number_of_characters': 613775, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'mal_Mlym-ton_Latn': {'num_samples': 1997, 'number_of_characters': 617792, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'mar_Deva-ben_Beng': {'num_samples': 1997, 'number_of_characters': 504689, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'mar_Deva-div_Thaa': {'num_samples': 1997, 'number_of_characters': 564735, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'mar_Deva-eng_Latn': {'num_samples': 1997, 'number_of_characters': 508607, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mar_Deva-eus_Latn': {'num_samples': 1997, 'number_of_characters': 536090, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'mar_Deva-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 505818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'mar_Deva-hin_Deva': {'num_samples': 1997, 'number_of_characters': 522462, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'mar_Deva-kan_Knda': {'num_samples': 1997, 'number_of_characters': 526423, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'mar_Deva-nep_Deva': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'mar_Deva-pan_Guru': {'num_samples': 1997, 'number_of_characters': 511309, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'mar_Deva-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 519628, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'mar_Deva-snd_Arab': {'num_samples': 1997, 'number_of_characters': 481214, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'mar_Deva-tam_Taml': {'num_samples': 1997, 'number_of_characters': 570848, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'mar_Deva-tel_Telu': {'num_samples': 1997, 'number_of_characters': 508414, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'mar_Deva-urd_Arab': {'num_samples': 1997, 'number_of_characters': 508885, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'mey_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 445016, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'mey_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 466202, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'mey_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 461555, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mey_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 457174, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'mey_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 414131, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'mey_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 459781, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'mey_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 456371, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'mey_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 456468, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'mey_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 428748, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'mey_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 487982, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'mkd_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 523981, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'mkd_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 522801, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'mkd_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 537780, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'mkd_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 509209, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'mkd_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 515611, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mkd_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 523816, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'mkd_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 545757, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'mkd_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 542784, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'mkd_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 520860, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'mkd_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 520787, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'mkd_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 520194, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'mkd_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 524032, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'mkd_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 530674, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'mlg_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 568028, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mlg_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 620813, 'unique_pairs': 1995, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'mlg_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 613728, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'mlg_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 607280, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'mlg_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 624460, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'mlg_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 594432, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'mlg_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 597491, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'mlg_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 598163, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'mlg_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 629931, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'mlg_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 633948, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'mlt_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 560435, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'mlt_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 525195, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mlt_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 570583, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'mlt_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 549461, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'mlt_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 566692, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'mlt_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 554690, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'mlt_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 569960, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'mlt_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 565675, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'mon_Mong-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 559677, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'mon_Mong-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 502269, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'mon_Mong-eng_Latn': {'num_samples': 1997, 'number_of_characters': 506768, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mon_Mong-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 547539, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'mon_Mong-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 526028, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'mon_Mong-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 570902, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'mon_Mong-tha_Thai': {'num_samples': 1997, 'number_of_characters': 496516, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'mri_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 521844, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mri_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 574629, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'mri_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 567544, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'mri_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 561096, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'mri_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 578276, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'mri_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 594432, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'mri_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 551307, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'mri_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 551979, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'mri_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 583747, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'mri_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 587764, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'msa_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 524903, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'msa_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 577688, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'msa_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 570603, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'msa_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564155, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'msa_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 581335, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'msa_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 597491, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'msa_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 551307, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'msa_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 555038, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'msa_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 586806, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'msa_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 590823, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'mya_Mymr-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 612483, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'mya_Mymr-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 555075, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'mya_Mymr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 559574, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mya_Mymr-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 600345, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'mya_Mymr-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 578834, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'mya_Mymr-mon_Mong': {'num_samples': 1997, 'number_of_characters': 570902, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'mya_Mymr-tha_Thai': {'num_samples': 1997, 'number_of_characters': 549322, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'nde_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 596231, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'nde_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 545459, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nde_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 536717, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'nde_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 526219, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'nde_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 601526, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'nde_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 582021, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'nde_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 596069, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'nde_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 597495, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'nep_Deva-ben_Beng': {'num_samples': 1997, 'number_of_characters': 492025, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'nep_Deva-div_Thaa': {'num_samples': 1997, 'number_of_characters': 552071, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'nep_Deva-eng_Latn': {'num_samples': 1997, 'number_of_characters': 495943, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nep_Deva-eus_Latn': {'num_samples': 1997, 'number_of_characters': 523426, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'nep_Deva-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 493154, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'nep_Deva-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509798, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'nep_Deva-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513759, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'nep_Deva-mar_Deva': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'nep_Deva-pan_Guru': {'num_samples': 1997, 'number_of_characters': 498645, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'nep_Deva-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506964, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'nep_Deva-snd_Arab': {'num_samples': 1997, 'number_of_characters': 468550, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'nep_Deva-tam_Taml': {'num_samples': 1997, 'number_of_characters': 558184, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'nep_Deva-tel_Telu': {'num_samples': 1997, 'number_of_characters': 495750, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'nep_Deva-urd_Arab': {'num_samples': 1997, 'number_of_characters': 496221, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'nld_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 560267, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nld_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 523096, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'nld_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 535717, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'nld_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 544053, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'nld_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 587565, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'nld_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 591042, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'nld_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 539635, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nld_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 549718, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'nld_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 535254, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'nld_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 561715, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'nld_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 585023, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'nld_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 492211, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'nld_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 553490, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'nld_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 570291, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'nld_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 578887, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'nld_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 554123, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'nld_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 403541, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'nld_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 425320, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'nld_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 551312, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'nld_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 572672, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'nld_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 540272, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'nld_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 543359, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'nld_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 569781, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'nld_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 569130, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'nld_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 566808, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'nld_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 580115, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'nld_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 564424, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'nld_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 543742, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'nld_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 601876, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'nld_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 557528, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'nld_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 563036, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nld_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 383393, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'nld_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 549647, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'nno_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 516709, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nno_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 500495, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'nno_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 544007, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'nno_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 496077, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nno_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 506160, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'nno_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 510565, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'nno_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 529114, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'nno_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 540272, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'nno_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 499801, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'nno_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 500184, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'nob_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 519796, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nob_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 503582, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'nob_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 547094, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'nob_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 499164, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nob_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 509247, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'nob_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 513652, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'nob_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 532201, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'nob_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 543359, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'nob_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 499801, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'nob_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 503271, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'nso_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 459006, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'nso_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 539219, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nso_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 561465, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'nso_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 537600, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'nso_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 528930, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'nso_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 582791, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'nso_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 579641, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'nso_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 564008, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'nso_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 456737, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'nso_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 625782, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'nso_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 531302, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'nso_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 559589, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'nso_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 607587, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'nso_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 549231, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'nya_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 582774, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'nya_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nya_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 523260, 'unique_pairs': 1995, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'nya_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 512762, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'nya_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 588069, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'nya_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 582021, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'nya_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 582612, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'nya_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 584038, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'orm_Ethi-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 404938, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'orm_Ethi-eng_Latn': {'num_samples': 1997, 'number_of_characters': 485151, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'orm_Ethi-hau_Latn': {'num_samples': 1997, 'number_of_characters': 507397, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'orm_Ethi-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 483532, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'orm_Ethi-nso_Latn': {'num_samples': 1997, 'number_of_characters': 528930, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'orm_Ethi-som_Latn': {'num_samples': 1997, 'number_of_characters': 528723, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'orm_Ethi-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 525573, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'orm_Ethi-swa_Latn': {'num_samples': 1997, 'number_of_characters': 509940, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'orm_Ethi-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 402669, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'orm_Ethi-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 571714, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'orm_Ethi-wol_Latn': {'num_samples': 1997, 'number_of_characters': 477234, 'unique_pairs': 1992, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'orm_Ethi-xho_Latn': {'num_samples': 1997, 'number_of_characters': 505521, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'orm_Ethi-yor_Latn': {'num_samples': 1997, 'number_of_characters': 553519, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'orm_Ethi-zul_Latn': {'num_samples': 1997, 'number_of_characters': 495163, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'pan_Guru-ben_Beng': {'num_samples': 1997, 'number_of_characters': 494224, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'pan_Guru-div_Thaa': {'num_samples': 1997, 'number_of_characters': 554270, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'pan_Guru-eng_Latn': {'num_samples': 1997, 'number_of_characters': 498142, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'pan_Guru-eus_Latn': {'num_samples': 1997, 'number_of_characters': 525625, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'pan_Guru-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 495353, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'pan_Guru-hin_Deva': {'num_samples': 1997, 'number_of_characters': 511997, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'pan_Guru-kan_Knda': {'num_samples': 1997, 'number_of_characters': 515958, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'pan_Guru-mar_Deva': {'num_samples': 1997, 'number_of_characters': 511309, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'pan_Guru-nep_Deva': {'num_samples': 1997, 'number_of_characters': 498645, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'pan_Guru-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 509163, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'pan_Guru-snd_Arab': {'num_samples': 1997, 'number_of_characters': 470749, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'pan_Guru-tam_Taml': {'num_samples': 1997, 'number_of_characters': 560383, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'pan_Guru-tel_Telu': {'num_samples': 1997, 'number_of_characters': 497949, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'pan_Guru-urd_Arab': {'num_samples': 1997, 'number_of_characters': 498420, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'pol_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 509047, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'pol_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 533956, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'pol_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 521668, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'pol_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 532776, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'pol_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 547755, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'pol_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 519184, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'pol_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 573516, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'pol_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 576993, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'pol_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 525586, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'pol_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 521205, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'pol_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 547666, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'pol_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 570974, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'pol_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 478162, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'pol_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 539441, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'pol_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 533791, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'pol_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 556242, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'pol_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564838, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'pol_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 389492, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'pol_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 411271, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'pol_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 537263, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'pol_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 545757, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'pol_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 569781, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'pol_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 555081, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'pol_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 552759, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'pol_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 530835, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'pol_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 530762, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'pol_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 566066, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'pol_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 530169, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'pol_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 534007, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'pol_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 550375, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'pol_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 529693, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'pol_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 587827, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'pol_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 543479, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'pol_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 540649, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'pol_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 548987, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'pol_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 369344, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'pol_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 535598, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'por_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 508396, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'por_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 521017, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'por_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 560175, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'por_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 572865, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'por_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 576342, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'por_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 524935, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'por_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 520554, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'por_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 547015, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'por_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 570323, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'por_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 549201, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'por_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 477511, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'por_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 538790, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'por_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 555591, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'por_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564187, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'por_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 566432, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'por_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 388841, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'por_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 410620, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'por_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 536612, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'por_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 554690, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'por_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 569130, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'por_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 555081, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'por_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 569700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'por_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 552108, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'por_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 565415, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'por_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 549724, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'por_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 529042, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'por_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 587176, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'por_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 542828, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'por_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 548336, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'por_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 368693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'por_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 534947, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'prs_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 473717, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'prs_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 494903, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'prs_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 490256, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'prs_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 485875, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'prs_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 442832, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'prs_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 488482, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'prs_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 456371, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'prs_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 485169, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'prs_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 457449, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'prs_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 516683, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'pus_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 473814, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'pus_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 495000, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'pus_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 490353, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'pus_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 485972, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'pus_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 442929, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'pus_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 488579, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'pus_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 456468, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'pus_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 485169, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'pus_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 457546, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'pus_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 516780, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'ron_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 575445, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'ron_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 540205, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ron_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 585593, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ron_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 564471, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ron_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 581702, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'ron_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 569960, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ron_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 569700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ron_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 580685, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'rus_Cyrl-arb_Arab': {'num_samples': 1997, 'number_of_characters': 506074, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'rus_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 530983, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'rus_Cyrl-ben_Beng': {'num_samples': 1997, 'number_of_characters': 518695, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'rus_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 529803, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'rus_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 544782, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'rus_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 516211, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'rus_Cyrl-deu_Latn': {'num_samples': 1997, 'number_of_characters': 570543, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'rus_Cyrl-ell_Grek': {'num_samples': 1997, 'number_of_characters': 574020, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'rus_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 522613, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'rus_Cyrl-fas_Arab': {'num_samples': 1997, 'number_of_characters': 518232, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'rus_Cyrl-fin_Latn': {'num_samples': 1997, 'number_of_characters': 544693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'rus_Cyrl-fra_Latn': {'num_samples': 1997, 'number_of_characters': 568001, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'rus_Cyrl-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 475189, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'rus_Cyrl-hin_Deva': {'num_samples': 1997, 'number_of_characters': 536468, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'rus_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 530818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'rus_Cyrl-hun_Latn': {'num_samples': 1997, 'number_of_characters': 553269, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'rus_Cyrl-ind_Latn': {'num_samples': 1997, 'number_of_characters': 561865, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'rus_Cyrl-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 386519, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'rus_Cyrl-kor_Hang': {'num_samples': 1997, 'number_of_characters': 408298, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'rus_Cyrl-lit_Latn': {'num_samples': 1997, 'number_of_characters': 534290, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'rus_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 542784, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'rus_Cyrl-nld_Latn': {'num_samples': 1997, 'number_of_characters': 566808, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'rus_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 552759, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'rus_Cyrl-por_Latn': {'num_samples': 1997, 'number_of_characters': 552108, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'rus_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 527862, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'rus_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 527789, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'rus_Cyrl-spa_Latn': {'num_samples': 1997, 'number_of_characters': 563093, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'rus_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 527196, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'rus_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 531034, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'rus_Cyrl-swa_Latn': {'num_samples': 1997, 'number_of_characters': 547402, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'rus_Cyrl-swe_Latn': {'num_samples': 1997, 'number_of_characters': 526720, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'rus_Cyrl-tam_Taml': {'num_samples': 1997, 'number_of_characters': 584854, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'rus_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 540506, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'rus_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 537676, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'rus_Cyrl-vie_Latn': {'num_samples': 1997, 'number_of_characters': 546014, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'rus_Cyrl-zho_Hant': {'num_samples': 1997, 'number_of_characters': 366371, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'rus_Cyrl-zul_Latn': {'num_samples': 1997, 'number_of_characters': 532625, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'shi_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 446094, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'shi_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 467280, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'shi_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 462633, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'shi_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 458252, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'shi_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 415209, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'shi_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 460859, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'shi_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 428748, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'shi_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 457449, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'shi_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 457546, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'shi_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 489060, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'sin_Sinh-ben_Beng': {'num_samples': 1997, 'number_of_characters': 502543, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'sin_Sinh-div_Thaa': {'num_samples': 1997, 'number_of_characters': 562589, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'sin_Sinh-eng_Latn': {'num_samples': 1997, 'number_of_characters': 506461, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'sin_Sinh-eus_Latn': {'num_samples': 1997, 'number_of_characters': 533944, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'sin_Sinh-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 503672, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'sin_Sinh-hin_Deva': {'num_samples': 1997, 'number_of_characters': 520316, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'sin_Sinh-kan_Knda': {'num_samples': 1997, 'number_of_characters': 524277, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'sin_Sinh-mar_Deva': {'num_samples': 1997, 'number_of_characters': 519628, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'sin_Sinh-nep_Deva': {'num_samples': 1997, 'number_of_characters': 506964, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'sin_Sinh-pan_Guru': {'num_samples': 1997, 'number_of_characters': 509163, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'sin_Sinh-snd_Arab': {'num_samples': 1997, 'number_of_characters': 479068, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'sin_Sinh-tam_Taml': {'num_samples': 1997, 'number_of_characters': 568702, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'sin_Sinh-tel_Telu': {'num_samples': 1997, 'number_of_characters': 506268, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'sin_Sinh-urd_Arab': {'num_samples': 1997, 'number_of_characters': 506739, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'slk_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 509059, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'slk_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 507879, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'slk_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 522858, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'slk_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 494287, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'slk_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500689, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'slk_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 508894, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'slk_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 520860, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'slk_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 530835, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'slk_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 527862, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'slk_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 505865, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'slk_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 505272, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'slk_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'slk_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 515752, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'slv_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 508986, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'slv_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 507806, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'slv_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 522785, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'slv_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 494214, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'slv_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500616, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'slv_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 508821, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'slv_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 520787, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'slv_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 530762, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'slv_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 527789, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'slv_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 505865, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'slv_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 505199, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'slv_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 509037, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'slv_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 515679, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'smo_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 525575, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'smo_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 578360, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'smo_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 571275, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'smo_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564827, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'smo_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 582007, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'smo_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 598163, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'smo_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 551979, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'smo_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 555038, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'smo_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 587478, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'smo_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 591495, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'sna_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 596822, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'sna_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 546050, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'sna_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 537308, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'sna_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 526810, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'sna_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 602117, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'sna_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 596069, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'sna_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 582612, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'sna_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 598086, 'unique_pairs': 1995, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'snd_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 464129, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'snd_Arab-div_Thaa': {'num_samples': 1997, 'number_of_characters': 524175, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'snd_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 468047, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'snd_Arab-eus_Latn': {'num_samples': 1997, 'number_of_characters': 495530, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'snd_Arab-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 465258, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'snd_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 481902, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'snd_Arab-kan_Knda': {'num_samples': 1997, 'number_of_characters': 485863, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'snd_Arab-mar_Deva': {'num_samples': 1997, 'number_of_characters': 481214, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'snd_Arab-nep_Deva': {'num_samples': 1997, 'number_of_characters': 468550, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'snd_Arab-pan_Guru': {'num_samples': 1997, 'number_of_characters': 470749, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'snd_Arab-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 479068, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'snd_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 530288, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'snd_Arab-tel_Telu': {'num_samples': 1997, 'number_of_characters': 467854, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'snd_Arab-urd_Arab': {'num_samples': 1997, 'number_of_characters': 468325, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'som_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 458799, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'som_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 539012, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'som_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 561258, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'som_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 537393, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'som_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 582791, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'som_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 528723, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'som_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 579434, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'som_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 563801, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'som_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 456530, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'som_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 625575, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'som_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 531095, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'som_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 559382, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'som_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 607380, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'som_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 549024, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'spa_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 519381, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'spa_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'spa_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 571160, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'spa_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 583850, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'spa_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 587327, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'spa_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 535920, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'spa_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 531539, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'spa_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 558000, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'spa_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 581308, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'spa_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 560186, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'spa_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 488496, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'spa_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 549775, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'spa_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 566576, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'spa_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 575172, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'spa_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 577417, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'spa_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 399826, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'spa_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 421605, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'spa_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 547597, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'spa_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 565675, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'spa_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 580115, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'spa_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 566066, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'spa_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 565415, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'spa_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 580685, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'spa_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 563093, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'spa_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 560709, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'spa_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 540027, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'spa_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 598161, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'spa_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 553813, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'spa_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 559321, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'spa_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 379678, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'spa_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 545932, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'sqi_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 582734, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'sqi_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 531327, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'sqi_Latn-hye_Armn': {'num_samples': 1997, 'number_of_characters': 548322, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'sqi_Latn-kat_Geor': {'num_samples': 1997, 'number_of_characters': 550199, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'srp_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 508393, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'srp_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 507213, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'srp_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 522192, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'srp_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 493621, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'srp_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500023, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'srp_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 508228, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'srp_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 520194, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'srp_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 530169, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'srp_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 527196, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'srp_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 505272, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'srp_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 505199, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'srp_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 508444, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'srp_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 515086, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'srp_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 512231, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'srp_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 511051, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'srp_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 526030, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'srp_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 497459, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'srp_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 503861, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'srp_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 512066, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'srp_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 524032, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'srp_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 534007, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'srp_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 531034, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'srp_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'srp_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 509037, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'srp_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 508444, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'srp_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 518924, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'ssw_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 455649, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'ssw_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 535862, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ssw_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 558108, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'ssw_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 534243, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'ssw_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 579641, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'ssw_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 525573, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'ssw_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 579434, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'ssw_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 560651, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ssw_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 453380, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'ssw_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 622425, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'ssw_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 527945, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'ssw_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 556232, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'ssw_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 604230, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ssw_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 545874, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'swa_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 440016, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'swa_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 503690, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'swa_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 516311, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'swa_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 568159, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'swa_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 571636, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'swa_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 520229, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'swa_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 515848, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'swa_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 542309, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'swa_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 565617, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'swa_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 542475, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'swa_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 472805, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'swa_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 534084, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'swa_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 550885, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'swa_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 518610, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'swa_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 559481, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'swa_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 384135, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'swa_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 405914, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'swa_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 531906, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'swa_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 564424, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'swa_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 564008, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'swa_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 509940, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'swa_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 550375, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'swa_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 549724, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'swa_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 547402, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'swa_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 563801, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'swa_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 560709, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'swa_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 560651, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'swa_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 524336, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'swa_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 582470, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'swa_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 437747, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'swa_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 606792, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'swa_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 538122, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'swa_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 543630, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'swa_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 512312, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'swa_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 540599, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'swa_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 588597, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'swa_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 363987, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'swa_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 530241, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'swe_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 520179, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'swe_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 483008, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'swe_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 495629, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'swe_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 503965, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'swe_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 547477, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'swe_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 550954, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'swe_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 499547, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'swe_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 509630, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'swe_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 495166, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'swe_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 521627, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'swe_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 544935, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'swe_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 452123, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'swe_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 513402, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'swe_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 530203, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'swe_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 538799, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'swe_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 514035, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'swe_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 363453, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'swe_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 385232, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'swe_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 511224, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'swe_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 532584, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'swe_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 543742, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'swe_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 500184, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'swe_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 503271, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'swe_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 529693, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'swe_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 529042, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'swe_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 526720, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'swe_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 540027, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'swe_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 524336, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'swe_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 561788, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'swe_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 517440, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'swe_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 522948, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'swe_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 343305, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'swe_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 509559, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'tah_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 557343, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tah_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 610128, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'tah_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 603043, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'tah_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 596595, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'tah_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 613775, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'tah_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 629931, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'tah_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 583747, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'tah_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 586806, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'tah_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 587478, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'tah_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 623263, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'tam_Taml-arb_Arab': {'num_samples': 1997, 'number_of_characters': 541142, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'tam_Taml-ben_Beng': {'num_samples': 1997, 'number_of_characters': 553763, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'tam_Taml-deu_Latn': {'num_samples': 1997, 'number_of_characters': 605611, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'tam_Taml-div_Thaa': {'num_samples': 1997, 'number_of_characters': 613809, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'tam_Taml-ell_Grek': {'num_samples': 1997, 'number_of_characters': 609088, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'tam_Taml-eng_Latn': {'num_samples': 1997, 'number_of_characters': 557681, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tam_Taml-eus_Latn': {'num_samples': 1997, 'number_of_characters': 585164, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'tam_Taml-fas_Arab': {'num_samples': 1997, 'number_of_characters': 553300, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'tam_Taml-fin_Latn': {'num_samples': 1997, 'number_of_characters': 579761, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'tam_Taml-fra_Latn': {'num_samples': 1997, 'number_of_characters': 603069, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'tam_Taml-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 554892, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'tam_Taml-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 510257, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'tam_Taml-hin_Deva': {'num_samples': 1997, 'number_of_characters': 571536, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'tam_Taml-hun_Latn': {'num_samples': 1997, 'number_of_characters': 588337, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'tam_Taml-ind_Latn': {'num_samples': 1997, 'number_of_characters': 596933, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'tam_Taml-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 421587, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'tam_Taml-kan_Knda': {'num_samples': 1997, 'number_of_characters': 575497, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'tam_Taml-kor_Hang': {'num_samples': 1997, 'number_of_characters': 443366, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'tam_Taml-lit_Latn': {'num_samples': 1997, 'number_of_characters': 569358, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'tam_Taml-mar_Deva': {'num_samples': 1997, 'number_of_characters': 570848, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'tam_Taml-nep_Deva': {'num_samples': 1997, 'number_of_characters': 558184, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'tam_Taml-nld_Latn': {'num_samples': 1997, 'number_of_characters': 601876, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tam_Taml-pan_Guru': {'num_samples': 1997, 'number_of_characters': 560383, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'tam_Taml-pol_Latn': {'num_samples': 1997, 'number_of_characters': 587827, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'tam_Taml-por_Latn': {'num_samples': 1997, 'number_of_characters': 587176, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'tam_Taml-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 584854, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'tam_Taml-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 568702, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'tam_Taml-snd_Arab': {'num_samples': 1997, 'number_of_characters': 530288, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'tam_Taml-spa_Latn': {'num_samples': 1997, 'number_of_characters': 598161, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'tam_Taml-swa_Latn': {'num_samples': 1997, 'number_of_characters': 582470, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tam_Taml-swe_Latn': {'num_samples': 1997, 'number_of_characters': 561788, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'tam_Taml-tel_Telu': {'num_samples': 1997, 'number_of_characters': 557488, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'tam_Taml-tur_Latn': {'num_samples': 1997, 'number_of_characters': 575574, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'tam_Taml-urd_Arab': {'num_samples': 1997, 'number_of_characters': 557959, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'tam_Taml-vie_Latn': {'num_samples': 1997, 'number_of_characters': 581082, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'tam_Taml-zho_Hant': {'num_samples': 1997, 'number_of_characters': 401439, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'tam_Taml-zul_Latn': {'num_samples': 1997, 'number_of_characters': 567693, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'tat_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 515560, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'tat_Cyrl-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 492252, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'tat_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 493646, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tat_Cyrl-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 506202, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'tat_Cyrl-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 496790, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'tat_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 531200, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'tat_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 511539, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'tat_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 556948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'tat_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 539621, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'tel_Telu-ben_Beng': {'num_samples': 1997, 'number_of_characters': 491329, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'tel_Telu-div_Thaa': {'num_samples': 1997, 'number_of_characters': 551375, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'tel_Telu-eng_Latn': {'num_samples': 1997, 'number_of_characters': 495247, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tel_Telu-eus_Latn': {'num_samples': 1997, 'number_of_characters': 522730, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'tel_Telu-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 492458, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'tel_Telu-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509102, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'tel_Telu-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513063, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'tel_Telu-mar_Deva': {'num_samples': 1997, 'number_of_characters': 508414, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'tel_Telu-nep_Deva': {'num_samples': 1997, 'number_of_characters': 495750, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'tel_Telu-pan_Guru': {'num_samples': 1997, 'number_of_characters': 497949, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'tel_Telu-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506268, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'tel_Telu-snd_Arab': {'num_samples': 1997, 'number_of_characters': 467854, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'tel_Telu-tam_Taml': {'num_samples': 1997, 'number_of_characters': 557488, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'tel_Telu-urd_Arab': {'num_samples': 1997, 'number_of_characters': 495525, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'tgk_Cyrl-arb_Arab': {'num_samples': 1997, 'number_of_characters': 505328, 'unique_pairs': 1995, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'tgk_Cyrl-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 526514, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'tgk_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 521867, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tgk_Cyrl-fas_Arab': {'num_samples': 1997, 'number_of_characters': 517486, 'unique_pairs': 1995, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'tgk_Cyrl-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 474443, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'tgk_Cyrl-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 520093, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'tgk_Cyrl-mey_Arab': {'num_samples': 1997, 'number_of_characters': 487982, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'tgk_Cyrl-prs_Arab': {'num_samples': 1997, 'number_of_characters': 516683, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'tgk_Cyrl-pus_Arab': {'num_samples': 1997, 'number_of_characters': 516780, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'tgk_Cyrl-shi_Arab': {'num_samples': 1997, 'number_of_characters': 489060, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'tha_Thai-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 538097, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'tha_Thai-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 480689, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'tha_Thai-eng_Latn': {'num_samples': 1997, 'number_of_characters': 485188, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tha_Thai-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 525959, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'tha_Thai-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 504448, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'tha_Thai-mon_Mong': {'num_samples': 1997, 'number_of_characters': 496516, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'tha_Thai-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 549322, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'tir_Ethi-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 332745, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'tir_Ethi-eng_Latn': {'num_samples': 1997, 'number_of_characters': 412958, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tir_Ethi-hau_Latn': {'num_samples': 1997, 'number_of_characters': 435204, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'tir_Ethi-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 411339, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'tir_Ethi-nso_Latn': {'num_samples': 1997, 'number_of_characters': 456737, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'tir_Ethi-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 402669, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'tir_Ethi-som_Latn': {'num_samples': 1997, 'number_of_characters': 456530, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'tir_Ethi-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 453380, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'tir_Ethi-swa_Latn': {'num_samples': 1997, 'number_of_characters': 437747, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tir_Ethi-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 499521, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'tir_Ethi-wol_Latn': {'num_samples': 1997, 'number_of_characters': 405041, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'tir_Ethi-xho_Latn': {'num_samples': 1997, 'number_of_characters': 433328, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'tir_Ethi-yor_Latn': {'num_samples': 1997, 'number_of_characters': 481326, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'tir_Ethi-zul_Latn': {'num_samples': 1997, 'number_of_characters': 422970, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'ton_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 561360, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ton_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 614145, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'ton_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 607060, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'ton_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 600612, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'ton_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 617792, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'ton_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 633948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'ton_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 587764, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'ton_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 590823, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'ton_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 591495, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'ton_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 623263, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'tsn_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 501790, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'tsn_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 582003, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tsn_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 604249, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'tsn_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 580384, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'tsn_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 625782, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'tsn_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 571714, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'tsn_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 625575, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'tsn_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 622425, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'tsn_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 606792, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tsn_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 499521, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'tsn_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 574086, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'tsn_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 602373, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'tsn_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 650371, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'tsn_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 592015, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'tuk_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 554908, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'tuk_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 531600, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'tuk_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 532994, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tuk_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 545550, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'tuk_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 536138, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'tuk_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 531200, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tuk_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 550887, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'tuk_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 596296, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'tuk_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 578969, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'tur_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 496794, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'tur_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 535247, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'tur_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 511939, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'tur_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 509415, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'tur_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 561263, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'tur_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 564740, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'tur_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 513333, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tur_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 508952, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'tur_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 535413, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'tur_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 558721, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'tur_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 465909, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'tur_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 527188, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'tur_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 543989, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'tur_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 552585, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'tur_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 377239, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'tur_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 525889, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'tur_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 516477, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'tur_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 399018, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'tur_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 525010, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'tur_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 557528, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tur_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 543479, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'tur_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 542828, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'tur_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 540506, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'tur_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 553813, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'tur_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 538122, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tur_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 517440, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'tur_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 575574, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'tur_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 511539, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tur_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 550887, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'tur_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 576635, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'tur_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 559308, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'tur_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 536734, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'tur_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 357091, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'tur_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 523345, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'uig_Arab-aze_Latn': {'num_samples': 1997, 'number_of_characters': 580656, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'uig_Arab-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 557348, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'uig_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 558742, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'uig_Arab-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 571298, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'uig_Arab-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 561886, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'uig_Arab-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 556948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'uig_Arab-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 596296, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'uig_Arab-tur_Latn': {'num_samples': 1997, 'number_of_characters': 576635, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'uig_Arab-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 604717, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'ukr_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 518873, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'ukr_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 517693, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'ukr_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 532672, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'ukr_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 504101, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'ukr_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 510503, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ukr_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 518708, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ukr_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 530674, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'ukr_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 540649, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ukr_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 537676, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ukr_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 515752, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'ukr_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 515679, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ukr_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 515086, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'ukr_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 518924, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'urd_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 491800, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'urd_Arab-div_Thaa': {'num_samples': 1997, 'number_of_characters': 551846, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'urd_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 495718, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'urd_Arab-eus_Latn': {'num_samples': 1997, 'number_of_characters': 523201, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'urd_Arab-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 492929, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'urd_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509573, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'urd_Arab-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513534, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'urd_Arab-mar_Deva': {'num_samples': 1997, 'number_of_characters': 508885, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'urd_Arab-nep_Deva': {'num_samples': 1997, 'number_of_characters': 496221, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'urd_Arab-pan_Guru': {'num_samples': 1997, 'number_of_characters': 498420, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'urd_Arab-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506739, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'urd_Arab-snd_Arab': {'num_samples': 1997, 'number_of_characters': 468325, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'urd_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 557959, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'urd_Arab-tel_Telu': {'num_samples': 1997, 'number_of_characters': 495525, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'uzb_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 563329, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'uzb_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 540021, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'uzb_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 541415, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'uzb_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 553971, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'uzb_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 544559, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'uzb_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 539621, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'uzb_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 578969, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'uzb_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 559308, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'uzb_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 604717, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'ven_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 598248, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'ven_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 547476, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ven_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 538734, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'ven_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 528236, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'ven_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 603543, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'ven_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 597495, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'ven_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 584038, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'ven_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 598086, 'unique_pairs': 1995, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'vie_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 502302, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'vie_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 514923, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'vie_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 566771, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'vie_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 570248, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'vie_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 518841, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'vie_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 514460, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'vie_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 540921, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'vie_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 564229, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'vie_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 471417, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'vie_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 532696, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'vie_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 549497, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'vie_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 558093, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'vie_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 382747, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'vie_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 404526, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'vie_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 530518, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'vie_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 563036, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'vie_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 548987, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'vie_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 548336, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'vie_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 546014, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'vie_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 559321, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'vie_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 543630, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'vie_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 522948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'vie_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 581082, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'vie_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 536734, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'vie_Latn-yue_Hant': {'num_samples': 1997, 'number_of_characters': 350008, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'vie_Latn-zho_Hans': {'num_samples': 1997, 'number_of_characters': 356082, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'vie_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 362599, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'vie_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 528853, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'wol_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 407310, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'wol_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 487523, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'wol_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 509769, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'wol_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 485904, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'wol_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 531302, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'wol_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 477234, 'unique_pairs': 1992, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'wol_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 531095, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'wol_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 527945, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'wol_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 512312, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'wol_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 405041, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'wol_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 574086, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'wol_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 507893, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'wol_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 555891, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'wol_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 497535, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'xho_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 435597, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'xho_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 515810, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'xho_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 538056, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'xho_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 514191, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'xho_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 559589, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'xho_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 505521, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'xho_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 559382, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'xho_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 556232, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'xho_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 540599, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'xho_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 433328, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'xho_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 602373, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'xho_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 507893, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'xho_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 584178, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'xho_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 525822, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'yor_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 483595, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'yor_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 563808, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'yor_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 586054, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'yor_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 562189, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'yor_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 607587, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'yor_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 553519, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'yor_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 607380, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'yor_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 604230, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'yor_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 588597, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'yor_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 481326, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'yor_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 650371, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'yor_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 555891, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'yor_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 584178, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'yor_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 573820, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'yue_Hant-eng_Latn': {'num_samples': 1997, 'number_of_characters': 326607, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'yue_Hant-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 190513, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'yue_Hant-kor_Hang': {'num_samples': 1997, 'number_of_characters': 212292, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'yue_Hant-vie_Latn': {'num_samples': 1997, 'number_of_characters': 350008, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'yue_Hant-zho_Hans': {'num_samples': 1997, 'number_of_characters': 163848, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'yue_Hant-zho_Hant': {'num_samples': 1997, 'number_of_characters': 170365, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'zho_Hans-eng_Latn': {'num_samples': 1997, 'number_of_characters': 332681, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'zho_Hans-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 196587, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'zho_Hans-kor_Hang': {'num_samples': 1997, 'number_of_characters': 218366, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'zho_Hans-vie_Latn': {'num_samples': 1997, 'number_of_characters': 356082, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'zho_Hans-yue_Hant': {'num_samples': 1997, 'number_of_characters': 163848, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'zho_Hans-zho_Hant': {'num_samples': 1997, 'number_of_characters': 176439, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'zho_Hant-arb_Arab': {'num_samples': 1997, 'number_of_characters': 322659, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'zho_Hant-ben_Beng': {'num_samples': 1997, 'number_of_characters': 335280, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'zho_Hant-deu_Latn': {'num_samples': 1997, 'number_of_characters': 387128, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'zho_Hant-ell_Grek': {'num_samples': 1997, 'number_of_characters': 390605, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'zho_Hant-eng_Latn': {'num_samples': 1997, 'number_of_characters': 339198, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'zho_Hant-fas_Arab': {'num_samples': 1997, 'number_of_characters': 334817, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'zho_Hant-fin_Latn': {'num_samples': 1997, 'number_of_characters': 361278, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'zho_Hant-fra_Latn': {'num_samples': 1997, 'number_of_characters': 384586, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'zho_Hant-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 291774, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'zho_Hant-hin_Deva': {'num_samples': 1997, 'number_of_characters': 353053, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'zho_Hant-hun_Latn': {'num_samples': 1997, 'number_of_characters': 369854, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'zho_Hant-ind_Latn': {'num_samples': 1997, 'number_of_characters': 378450, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'zho_Hant-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 203104, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'zho_Hant-kor_Hang': {'num_samples': 1997, 'number_of_characters': 224883, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'zho_Hant-lit_Latn': {'num_samples': 1997, 'number_of_characters': 350875, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'zho_Hant-nld_Latn': {'num_samples': 1997, 'number_of_characters': 383393, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'zho_Hant-pol_Latn': {'num_samples': 1997, 'number_of_characters': 369344, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'zho_Hant-por_Latn': {'num_samples': 1997, 'number_of_characters': 368693, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'zho_Hant-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 366371, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'zho_Hant-spa_Latn': {'num_samples': 1997, 'number_of_characters': 379678, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'zho_Hant-swa_Latn': {'num_samples': 1997, 'number_of_characters': 363987, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'zho_Hant-swe_Latn': {'num_samples': 1997, 'number_of_characters': 343305, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'zho_Hant-tam_Taml': {'num_samples': 1997, 'number_of_characters': 401439, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'zho_Hant-tur_Latn': {'num_samples': 1997, 'number_of_characters': 357091, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'zho_Hant-vie_Latn': {'num_samples': 1997, 'number_of_characters': 362599, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'zho_Hant-yue_Hant': {'num_samples': 1997, 'number_of_characters': 170365, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'zho_Hant-zho_Hans': {'num_samples': 1997, 'number_of_characters': 176439, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'zho_Hant-zul_Latn': {'num_samples': 1997, 'number_of_characters': 349210, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'zul_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 425239, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'zul_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 488913, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'zul_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 501534, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'zul_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 553382, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'zul_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 556859, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'zul_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 505452, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'zul_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 501071, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'zul_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 527532, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'zul_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 550840, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'zul_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 527698, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'zul_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 458028, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'zul_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 519307, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'zul_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 536108, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'zul_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 503833, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'zul_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 544704, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'zul_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 369358, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'zul_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 391137, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'zul_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 517129, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'zul_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 549647, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'zul_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 549231, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'zul_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 495163, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'zul_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 535598, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'zul_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 534947, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'zul_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 532625, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'zul_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 549024, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'zul_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 545932, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'zul_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 545874, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'zul_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 530241, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'zul_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 509559, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'zul_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 567693, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'zul_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 422970, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'zul_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 592015, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'zul_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 523345, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'zul_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 528853, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'zul_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 497535, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'zul_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 525822, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'zul_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 573820, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'zul_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 349210, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}}}} | | [NYSJudicialEthicsLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [NaijaSenti](https://github.com/hausanlp/NaijaSenti) | ['hau', 'ibo', 'pcm', 'yor'] | Classification | s2s | [Social, Written] | None | None | | [NarrativeQARetrieval](https://metatext.io/datasets/narrativeqa) (Tomáš Kočiský, 2017) | ['eng'] | Retrieval | s2p | | None | None | @@ -368,14 +368,14 @@ The following tables give you an overview of the tasks in MTEB. | [News21InstructionRetrieval](https://arxiv.org/abs/2403.15246) (Orion Weller, 2024) | ['eng'] | InstructionRetrieval | s2p | [News, Written] | None | None | | [NewsClassification](https://arxiv.org/abs/1509.01626) (Zhang et al., 2015) | ['eng'] | Classification | s2s | [News, Written] | None | None | | [NoRecClassification](https://aclanthology.org/L18-1661/) | ['nob'] | Classification | s2s | [Written, Reviews] | None | None | -| [NollySentiBitextMining](https://github.com/IyanuSh/NollySenti) (Shode et al., 2023) | ['eng', 'hau', 'ibo', 'pcm', 'yor'] | BitextMining | s2s | [Social, Reviews, Written] | None | None | +| [NollySentiBitextMining](https://github.com/IyanuSh/NollySenti) (Shode et al., 2023) | ['eng', 'hau', 'ibo', 'pcm', 'yor'] | BitextMining | s2s | [Social, Reviews, Written] | {'train': 1640} | {'train': {'num_samples': 1640, 'number_of_characters': 445805, 'unique_pairs': 1632, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 3, 'average_sentence2_length': 135.52, 'max_sentence2_length': 1728, 'unique_sentence2': 1631, 'hf_subset_descriptive_stats': {'en-ha': {'num_samples': 410, 'number_of_characters': 115348, 'unique_pairs': 407, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 4, 'average_sentence2_length': 145.02, 'max_sentence2_length': 1728, 'unique_sentence2': 407}, 'en-ig': {'num_samples': 410, 'number_of_characters': 107173, 'unique_pairs': 409, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 5, 'average_sentence2_length': 125.08, 'max_sentence2_length': 1137, 'unique_sentence2': 408}, 'en-pcm': {'num_samples': 410, 'number_of_characters': 109955, 'unique_pairs': 408, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 3, 'average_sentence2_length': 131.87, 'max_sentence2_length': 1552, 'unique_sentence2': 408}, 'en-yo': {'num_samples': 410, 'number_of_characters': 113329, 'unique_pairs': 409, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 6, 'average_sentence2_length': 140.1, 'max_sentence2_length': 1338, 'unique_sentence2': 409}}}} | | [NorQuadRetrieval](https://aclanthology.org/2023.nodalida-1.17/) | ['nob'] | Retrieval | p2p | [Encyclopaedic, Non-fiction, Written] | None | None | | [NordicLangClassification](https://aclanthology.org/2021.vardial-1.8/) | ['dan', 'fao', 'isl', 'nno', 'nob', 'swe'] | Classification | s2s | [Encyclopaedic] | None | None | -| [NorwegianCourtsBitextMining](https://opus.nlpl.eu/index.php) (Tiedemann et al., 2020) | ['nno', 'nob'] | BitextMining | s2s | [Legal, Written] | None | None | +| [NorwegianCourtsBitextMining](https://opus.nlpl.eu/index.php) (Tiedemann et al., 2020) | ['nno', 'nob'] | BitextMining | s2s | [Legal, Written] | {'test': 228} | {'test': {'num_samples': 228, 'number_of_characters': 37441, 'unique_pairs': 228, 'min_sentence1_length': 13, 'average_sentence1_length': 82.2, 'max_sentence1_length': 272, 'unique_sentence1': 227, 'min_sentence2_length': 10, 'average_sentence2_length': 82.02, 'max_sentence2_length': 269, 'unique_sentence2': 226}} | | [NorwegianParliamentClassification](https://huggingface.co/datasets/NbAiLab/norwegian_parliament) | ['nob'] | Classification | s2s | [Government, Spoken] | None | None | | [NusaParagraphEmotionClassification](https://github.com/IndoNLP/nusa-writes) | ['bbc', 'bew', 'bug', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] | Classification | s2s | [Non-fiction, Fiction, Written] | None | None | | [NusaParagraphTopicClassification](https://github.com/IndoNLP/nusa-writes) | ['bbc', 'bew', 'bug', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] | Classification | s2s | [Non-fiction, Fiction, Written] | None | None | -| [NusaTranslationBitextMining](https://huggingface.co/datasets/indonlp/nusatranslation_mt) (Cahyawijaya et al., 2023) | ['abs', 'bbc', 'bew', 'bhp', 'ind', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] | BitextMining | s2s | [Social, Written] | {'train': 50200} | {'train': {'average_sentence1_length': 145.46, 'average_sentence2_length': 148.57, 'num_samples': 50200, 'number_of_characters': 14759870, 'hf_subset_descriptive_stats': {'ind-abs': {'average_sentence1_length': 148.37, 'average_sentence2_length': 147.31, 'num_samples': 1000, 'number_of_characters': 295680}, 'ind-btk': {'average_sentence1_length': 145.37, 'average_sentence2_length': 146.74, 'num_samples': 6600, 'number_of_characters': 1927907}, 'ind-bew': {'average_sentence1_length': 145.43, 'average_sentence2_length': 148.41, 'num_samples': 6600, 'number_of_characters': 1939300}, 'ind-bhp': {'average_sentence1_length': 133.53, 'average_sentence2_length': 128.14, 'num_samples': 1000, 'number_of_characters': 261666}, 'ind-jav': {'average_sentence1_length': 145.43, 'average_sentence2_length': 145.81, 'num_samples': 6600, 'number_of_characters': 1922162}, 'ind-mad': {'average_sentence1_length': 145.36, 'average_sentence2_length': 153.62, 'num_samples': 6600, 'number_of_characters': 1973257}, 'ind-mak': {'average_sentence1_length': 145.43, 'average_sentence2_length': 150.61, 'num_samples': 6600, 'number_of_characters': 1953868}, 'ind-min': {'average_sentence1_length': 145.43, 'average_sentence2_length': 148.06, 'num_samples': 6600, 'number_of_characters': 1937033}, 'ind-mui': {'average_sentence1_length': 150.45, 'average_sentence2_length': 150.99, 'num_samples': 1000, 'number_of_characters': 301448}, 'ind-rej': {'average_sentence1_length': 151.62, 'average_sentence2_length': 139.58, 'num_samples': 1000, 'number_of_characters': 291205}, 'ind-sun': {'average_sentence1_length': 145.43, 'average_sentence2_length': 150.99, 'num_samples': 6600, 'number_of_characters': 1956344}}}} | +| [NusaTranslationBitextMining](https://huggingface.co/datasets/indonlp/nusatranslation_mt) (Cahyawijaya et al., 2023) | ['abs', 'bbc', 'bew', 'bhp', 'ind', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] | BitextMining | s2s | [Social, Written] | {'train': 50200} | {'train': {'num_samples': 50200, 'number_of_characters': 14759870, 'unique_pairs': 50140, 'min_sentence1_length': 5, 'average_sentence1_length': 145.46, 'max_sentence1_length': 873, 'unique_sentence1': 8258, 'min_sentence2_length': 5, 'average_sentence2_length': 148.57, 'max_sentence2_length': 980, 'unique_sentence2': 50102, 'hf_subset_descriptive_stats': {'ind-abs': {'num_samples': 1000, 'number_of_characters': 295680, 'unique_pairs': 999, 'min_sentence1_length': 5, 'average_sentence1_length': 148.37, 'max_sentence1_length': 727, 'unique_sentence1': 998, 'min_sentence2_length': 6, 'average_sentence2_length': 147.31, 'max_sentence2_length': 629, 'unique_sentence2': 998}, 'ind-btk': {'num_samples': 6600, 'number_of_characters': 1927907, 'unique_pairs': 6597, 'min_sentence1_length': 5, 'average_sentence1_length': 145.37, 'max_sentence1_length': 873, 'unique_sentence1': 6521, 'min_sentence2_length': 5, 'average_sentence2_length': 146.74, 'max_sentence2_length': 980, 'unique_sentence2': 6596}, 'ind-bew': {'num_samples': 6600, 'number_of_characters': 1939300, 'unique_pairs': 6595, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 6, 'average_sentence2_length': 148.41, 'max_sentence2_length': 840, 'unique_sentence2': 6590}, 'ind-bhp': {'num_samples': 1000, 'number_of_characters': 261666, 'unique_pairs': 1000, 'min_sentence1_length': 11, 'average_sentence1_length': 133.53, 'max_sentence1_length': 468, 'unique_sentence1': 999, 'min_sentence2_length': 10, 'average_sentence2_length': 128.14, 'max_sentence2_length': 459, 'unique_sentence2': 999}, 'ind-jav': {'num_samples': 6600, 'number_of_characters': 1922162, 'unique_pairs': 6594, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 5, 'average_sentence2_length': 145.81, 'max_sentence2_length': 854, 'unique_sentence2': 6585}, 'ind-mad': {'num_samples': 6600, 'number_of_characters': 1973257, 'unique_pairs': 6598, 'min_sentence1_length': 5, 'average_sentence1_length': 145.36, 'max_sentence1_length': 873, 'unique_sentence1': 6521, 'min_sentence2_length': 5, 'average_sentence2_length': 153.62, 'max_sentence2_length': 827, 'unique_sentence2': 6592}, 'ind-mak': {'num_samples': 6600, 'number_of_characters': 1953868, 'unique_pairs': 6594, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 6, 'average_sentence2_length': 150.61, 'max_sentence2_length': 888, 'unique_sentence2': 6586}, 'ind-min': {'num_samples': 6600, 'number_of_characters': 1937033, 'unique_pairs': 6595, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 6, 'average_sentence2_length': 148.06, 'max_sentence2_length': 837, 'unique_sentence2': 6591}, 'ind-mui': {'num_samples': 1000, 'number_of_characters': 301448, 'unique_pairs': 1000, 'min_sentence1_length': 11, 'average_sentence1_length': 150.45, 'max_sentence1_length': 451, 'unique_sentence1': 997, 'min_sentence2_length': 11, 'average_sentence2_length': 150.99, 'max_sentence2_length': 450, 'unique_sentence2': 1000}, 'ind-rej': {'num_samples': 1000, 'number_of_characters': 291205, 'unique_pairs': 1000, 'min_sentence1_length': 9, 'average_sentence1_length': 151.62, 'max_sentence1_length': 873, 'unique_sentence1': 998, 'min_sentence2_length': 8, 'average_sentence2_length': 139.58, 'max_sentence2_length': 784, 'unique_sentence2': 1000}, 'ind-sun': {'num_samples': 6600, 'number_of_characters': 1956344, 'unique_pairs': 6591, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 5, 'average_sentence2_length': 150.99, 'max_sentence2_length': 881, 'unique_sentence2': 6588}}}} | | [NusaX-senti](https://arxiv.org/abs/2205.15960) (Winata et al., 2022) | ['ace', 'ban', 'bbc', 'bjn', 'bug', 'eng', 'ind', 'jav', 'mad', 'min', 'nij', 'sun'] | Classification | s2s | [Reviews, Web, Social, Constructed, Written] | None | None | | [NusaXBitextMining](https://huggingface.co/datasets/indonlp/NusaX-senti/) (Winata et al., 2023) | ['ace', 'ban', 'bbc', 'bjn', 'bug', 'eng', 'ind', 'jav', 'mad', 'min', 'nij', 'sun'] | BitextMining | s2s | [Reviews, Written] | None | None | | [OPP115DataRetentionLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | @@ -400,10 +400,10 @@ The following tables give you an overview of the tasks in MTEB. | [PROALegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [PSC](http://www.lrec-conf.org/proceedings/lrec2014/pdf/1211_Paper.pdf) | ['pol'] | PairClassification | s2s | [News, Written] | None | None | | [PatentClassification](https://aclanthology.org/P19-1212.pdf) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | -| [PawsXPairClassification](https://arxiv.org/abs/1908.11828) (Yinfei Yang, 2019) | ['cmn', 'deu', 'eng', 'fra', 'jpn', 'kor', 'spa'] | PairClassification | s2s | [Web, Encyclopaedic, Written] | {'test': 14000, 'validation': 14000} | {'test': {'num_samples': 14000, 'number_of_characters': 2551922, 'avg_sentence1_len': 91.18, 'avg_sentence2_len': 91.1, 'unique_labels': 2, 'labels': {'1': {'count': 6285}, '0': {'count': 7715}}, 'hf_subset_descriptive_stats': {'de': {'num_samples': 2000, 'number_of_characters': 478034, 'avg_sentence1_len': 119.78, 'avg_sentence2_len': 119.24, 'unique_labels': 2, 'labels': {'1': {'count': 895}, '0': {'count': 1105}}}, 'en': {'num_samples': 2000, 'number_of_characters': 454362, 'avg_sentence1_len': 113.76, 'avg_sentence2_len': 113.42, 'unique_labels': 2, 'labels': {'1': {'count': 907}, '0': {'count': 1093}}}, 'es': {'num_samples': 2000, 'number_of_characters': 471226, 'avg_sentence1_len': 117.81, 'avg_sentence2_len': 117.8, 'unique_labels': 2, 'labels': {'1': {'count': 907}, '0': {'count': 1093}}}, 'fr': {'num_samples': 2000, 'number_of_characters': 480033, 'avg_sentence1_len': 120.03, 'avg_sentence2_len': 119.99, 'unique_labels': 2, 'labels': {'1': {'count': 903}, '0': {'count': 1097}}}, 'ja': {'num_samples': 2000, 'number_of_characters': 235106, 'avg_sentence1_len': 58.68, 'avg_sentence2_len': 58.88, 'unique_labels': 2, 'labels': {'1': {'count': 883}, '0': {'count': 1117}}}, 'ko': {'num_samples': 2000, 'number_of_characters': 260149, 'avg_sentence1_len': 64.96, 'avg_sentence2_len': 65.11, 'unique_labels': 2, 'labels': {'1': {'count': 896}, '0': {'count': 1104}}}, 'zh': {'num_samples': 2000, 'number_of_characters': 173012, 'avg_sentence1_len': 43.23, 'avg_sentence2_len': 43.27, 'unique_labels': 2, 'labels': {'1': {'count': 894}, '0': {'count': 1106}}}}}, 'validation': {'num_samples': 14000, 'number_of_characters': 2524625, 'avg_sentence1_len': 90.13, 'avg_sentence2_len': 90.2, 'unique_labels': 2, 'labels': {'1': {'count': 5948}, '0': {'count': 8052}}, 'hf_subset_descriptive_stats': {'de': {'num_samples': 2000, 'number_of_characters': 467643, 'avg_sentence1_len': 116.82, 'avg_sentence2_len': 117.0, 'unique_labels': 2, 'labels': {'1': {'count': 831}, '0': {'count': 1169}}}, 'en': {'num_samples': 2000, 'number_of_characters': 451931, 'avg_sentence1_len': 113.11, 'avg_sentence2_len': 112.86, 'unique_labels': 2, 'labels': {'1': {'count': 863}, '0': {'count': 1137}}}, 'es': {'num_samples': 2000, 'number_of_characters': 466112, 'avg_sentence1_len': 116.33, 'avg_sentence2_len': 116.73, 'unique_labels': 2, 'labels': {'1': {'count': 847}, '0': {'count': 1153}}}, 'fr': {'num_samples': 2000, 'number_of_characters': 478510, 'avg_sentence1_len': 119.5, 'avg_sentence2_len': 119.75, 'unique_labels': 2, 'labels': {'1': {'count': 860}, '0': {'count': 1140}}}, 'ja': {'num_samples': 2000, 'number_of_characters': 229655, 'avg_sentence1_len': 57.51, 'avg_sentence2_len': 57.32, 'unique_labels': 2, 'labels': {'1': {'count': 854}, '0': {'count': 1146}}}, 'ko': {'num_samples': 2000, 'number_of_characters': 261355, 'avg_sentence1_len': 65.16, 'avg_sentence2_len': 65.52, 'unique_labels': 2, 'labels': {'1': {'count': 840}, '0': {'count': 1160}}}, 'zh': {'num_samples': 2000, 'number_of_characters': 169419, 'avg_sentence1_len': 42.45, 'avg_sentence2_len': 42.26, 'unique_labels': 2, 'labels': {'1': {'count': 853}, '0': {'count': 1147}}}}}} | +| [PawsXPairClassification](https://arxiv.org/abs/1908.11828) (Yinfei Yang, 2019) | ['cmn', 'deu', 'eng', 'fra', 'jpn', 'kor', 'spa'] | PairClassification | s2s | [Web, Encyclopaedic, Written] | {'test': 14000, 'validation': 14000} | {'test': {'num_samples': 14000, 'number_of_characters': 2551922, 'min_sentence1_length': 2, 'avg_sentence1_length': 91.18, 'max_sentence1_length': 268, 'unique_sentence1': 13404, 'min_sentence2_length': 2, 'avg_sentence2_length': 91.1, 'max_sentence2_length': 247, 'unique_sentence2': 13462, 'unique_labels': 2, 'labels': {'1': {'count': 6285}, '0': {'count': 7715}}, 'hf_subset_descriptive_stats': {'de': {'num_samples': 2000, 'number_of_characters': 478034, 'min_sentence1_length': 2, 'avg_sentence1_length': 119.78, 'max_sentence1_length': 268, 'unique_sentence1': 1934, 'min_sentence2_length': 2, 'avg_sentence2_length': 119.24, 'max_sentence2_length': 235, 'unique_sentence2': 1938, 'unique_labels': 2, 'labels': {'1': {'count': 895}, '0': {'count': 1105}}}, 'en': {'num_samples': 2000, 'number_of_characters': 454362, 'min_sentence1_length': 25, 'avg_sentence1_length': 113.76, 'max_sentence1_length': 209, 'unique_sentence1': 1761, 'min_sentence2_length': 25, 'avg_sentence2_length': 113.42, 'max_sentence2_length': 209, 'unique_sentence2': 1800, 'unique_labels': 2, 'labels': {'1': {'count': 907}, '0': {'count': 1093}}}, 'es': {'num_samples': 2000, 'number_of_characters': 471226, 'min_sentence1_length': 2, 'avg_sentence1_length': 117.81, 'max_sentence1_length': 226, 'unique_sentence1': 1955, 'min_sentence2_length': 22, 'avg_sentence2_length': 117.8, 'max_sentence2_length': 233, 'unique_sentence2': 1959, 'unique_labels': 2, 'labels': {'1': {'count': 907}, '0': {'count': 1093}}}, 'fr': {'num_samples': 2000, 'number_of_characters': 480033, 'min_sentence1_length': 2, 'avg_sentence1_length': 120.03, 'max_sentence1_length': 238, 'unique_sentence1': 1954, 'min_sentence2_length': 2, 'avg_sentence2_length': 119.99, 'max_sentence2_length': 247, 'unique_sentence2': 1953, 'unique_labels': 2, 'labels': {'1': {'count': 903}, '0': {'count': 1097}}}, 'ja': {'num_samples': 2000, 'number_of_characters': 235106, 'min_sentence1_length': 2, 'avg_sentence1_length': 58.68, 'max_sentence1_length': 192, 'unique_sentence1': 1944, 'min_sentence2_length': 2, 'avg_sentence2_length': 58.88, 'max_sentence2_length': 198, 'unique_sentence2': 1941, 'unique_labels': 2, 'labels': {'1': {'count': 883}, '0': {'count': 1117}}}, 'ko': {'num_samples': 2000, 'number_of_characters': 260149, 'min_sentence1_length': 2, 'avg_sentence1_length': 64.96, 'max_sentence1_length': 153, 'unique_sentence1': 1954, 'min_sentence2_length': 2, 'avg_sentence2_length': 65.11, 'max_sentence2_length': 159, 'unique_sentence2': 1969, 'unique_labels': 2, 'labels': {'1': {'count': 896}, '0': {'count': 1104}}}, 'zh': {'num_samples': 2000, 'number_of_characters': 173012, 'min_sentence1_length': 2, 'avg_sentence1_length': 43.23, 'max_sentence1_length': 120, 'unique_sentence1': 1909, 'min_sentence2_length': 2, 'avg_sentence2_length': 43.27, 'max_sentence2_length': 113, 'unique_sentence2': 1909, 'unique_labels': 2, 'labels': {'1': {'count': 894}, '0': {'count': 1106}}}}}, 'validation': {'num_samples': 14000, 'number_of_characters': 2524625, 'min_sentence1_length': 2, 'avg_sentence1_length': 90.13, 'max_sentence1_length': 248, 'unique_sentence1': 13357, 'min_sentence2_length': 2, 'avg_sentence2_length': 90.2, 'max_sentence2_length': 275, 'unique_sentence2': 13397, 'unique_labels': 2, 'labels': {'1': {'count': 5948}, '0': {'count': 8052}}, 'hf_subset_descriptive_stats': {'de': {'num_samples': 2000, 'number_of_characters': 467643, 'min_sentence1_length': 2, 'avg_sentence1_length': 116.82, 'max_sentence1_length': 248, 'unique_sentence1': 1914, 'min_sentence2_length': 2, 'avg_sentence2_length': 117.0, 'max_sentence2_length': 275, 'unique_sentence2': 1920, 'unique_labels': 2, 'labels': {'1': {'count': 831}, '0': {'count': 1169}}}, 'en': {'num_samples': 2000, 'number_of_characters': 451931, 'min_sentence1_length': 25, 'avg_sentence1_length': 113.11, 'max_sentence1_length': 213, 'unique_sentence1': 1758, 'min_sentence2_length': 25, 'avg_sentence2_length': 112.86, 'max_sentence2_length': 213, 'unique_sentence2': 1771, 'unique_labels': 2, 'labels': {'1': {'count': 863}, '0': {'count': 1137}}}, 'es': {'num_samples': 2000, 'number_of_characters': 466112, 'min_sentence1_length': 2, 'avg_sentence1_length': 116.33, 'max_sentence1_length': 240, 'unique_sentence1': 1938, 'min_sentence2_length': 2, 'avg_sentence2_length': 116.73, 'max_sentence2_length': 241, 'unique_sentence2': 1941, 'unique_labels': 2, 'labels': {'1': {'count': 847}, '0': {'count': 1153}}}, 'fr': {'num_samples': 2000, 'number_of_characters': 478510, 'min_sentence1_length': 2, 'avg_sentence1_length': 119.5, 'max_sentence1_length': 233, 'unique_sentence1': 1933, 'min_sentence2_length': 2, 'avg_sentence2_length': 119.75, 'max_sentence2_length': 246, 'unique_sentence2': 1939, 'unique_labels': 2, 'labels': {'1': {'count': 860}, '0': {'count': 1140}}}, 'ja': {'num_samples': 2000, 'number_of_characters': 229655, 'min_sentence1_length': 2, 'avg_sentence1_length': 57.51, 'max_sentence1_length': 126, 'unique_sentence1': 1957, 'min_sentence2_length': 2, 'avg_sentence2_length': 57.32, 'max_sentence2_length': 121, 'unique_sentence2': 1969, 'unique_labels': 2, 'labels': {'1': {'count': 854}, '0': {'count': 1146}}}, 'ko': {'num_samples': 2000, 'number_of_characters': 261355, 'min_sentence1_length': 2, 'avg_sentence1_length': 65.16, 'max_sentence1_length': 178, 'unique_sentence1': 1963, 'min_sentence2_length': 2, 'avg_sentence2_length': 65.52, 'max_sentence2_length': 174, 'unique_sentence2': 1968, 'unique_labels': 2, 'labels': {'1': {'count': 840}, '0': {'count': 1160}}}, 'zh': {'num_samples': 2000, 'number_of_characters': 169419, 'min_sentence1_length': 2, 'avg_sentence1_length': 42.45, 'max_sentence1_length': 101, 'unique_sentence1': 1899, 'min_sentence2_length': 2, 'avg_sentence2_length': 42.26, 'max_sentence2_length': 120, 'unique_sentence2': 1895, 'unique_labels': 2, 'labels': {'1': {'count': 853}, '0': {'count': 1147}}}}}} | | [PersianFoodSentimentClassification](https://hooshvare.github.io/docs/datasets/sa) (Mehrdad Farahani et al., 2020) | ['fas'] | Classification | s2s | [Reviews, Written] | None | None | | [PersonalJurisdictionLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | -| [PhincBitextMining](https://huggingface.co/datasets/veezbo/phinc) (Srivastava et al., 2020) | ['eng', 'hin'] | BitextMining | s2s | [Social, Written] | None | None | +| [PhincBitextMining](https://huggingface.co/datasets/veezbo/phinc) (Srivastava et al., 2020) | ['eng', 'hin'] | BitextMining | s2s | [Social, Written] | {'train': 13738} | {'train': {'num_samples': 13738, 'number_of_characters': 2069457, 'unique_pairs': 13737, 'min_sentence1_length': 1, 'average_sentence1_length': 74.02, 'max_sentence1_length': 278, 'unique_sentence1': 13515, 'min_sentence2_length': 3, 'average_sentence2_length': 76.61, 'max_sentence2_length': 274, 'unique_sentence2': 13736, 'hf_subset_descriptive_stats': {'eng-eng_hin': {'num_samples': 13738, 'number_of_characters': 2069457, 'unique_pairs': 13737, 'min_sentence1_length': 1, 'average_sentence1_length': 74.02, 'max_sentence1_length': 278, 'unique_sentence1': 13515, 'min_sentence2_length': 3, 'average_sentence2_length': 76.61, 'max_sentence2_length': 274, 'unique_sentence2': 13736}}}} | | [PlscClusteringP2P.v2](https://huggingface.co/datasets/rafalposwiata/plsc) | ['pol'] | Clustering | s2s | [Academic, Written] | None | None | | [PlscClusteringS2S.v2](https://huggingface.co/datasets/rafalposwiata/plsc) | ['pol'] | Clustering | s2s | [Academic, Written] | None | None | | [PoemSentimentClassification](https://arxiv.org/abs/2011.02686) (Emily Sheng, 2020) | ['eng'] | Classification | s2s | [Reviews, Written] | None | None | @@ -423,7 +423,7 @@ The following tables give you an overview of the tasks in MTEB. | [RTE3](https://aclanthology.org/W07-1401/) | ['deu', 'eng', 'fra', 'ita'] | PairClassification | s2s | [News, Web, Encyclopaedic, Written] | None | None | | [RUParaPhraserSTS](https://aclanthology.org/2020.ngt-1.6) (Pivovarova et al., 2017) | ['rus'] | STS | s2s | [News, Written] | None | None | | [RedditClustering.v2](https://arxiv.org/abs/2104.07081) (Gregor Geigle, 2021) | ['eng'] | Clustering | s2s | [Web, Social, Written] | None | None | -| [RedditClusteringP2P.v2](https://arxiv.org/abs/2104.07081) (Gregor Geigle, 2021) | ['eng'] | Clustering | p2p | [Web, Social, Written] | None | None | +| [RedditClusteringP2P.v2](https://arxiv.org/abs/2104.07081) (Gregor Geigle, 2021) | ['eng'] | Clustering | p2p | [Web, Social, Written] | {'test': 459389} | {'test': {'num_samples': 459389, 'number_of_characters': 334286895, 'min_text_length': 79, 'average_text_length': 727.68, 'max_text_length': 4359, 'min_labels_per_text': 2, 'average_labels_per_text': 1.0, 'max_labels_per_text': 77908, 'unique_labels': 440, 'labels': {'FortNiteBR': {'count': 436}, 'buildapc': {'count': 8484}, 'offmychest': {'count': 570}, 'nus': {'count': 45}, 'relationship_advice': {'count': 16651}, 'premed': {'count': 201}, 'dogecoin': {'count': 8108}, 'GamingLaptops': {'count': 183}, 'asktransgender': {'count': 326}, 'MachineLearning': {'count': 61}, 'puppy101': {'count': 1597}, 'GunAccessoriesForSale': {'count': 2619}, 'Random_Acts_Of_Amazon': {'count': 1115}, 'Catholicism': {'count': 183}, 'MonsterHunter': {'count': 218}, 'tipofmypenis': {'count': 87}, 'samsung': {'count': 69}, 'PersonalFinanceCanada': {'count': 341}, 'Dyson_Sphere_Program': {'count': 55}, 'bleach': {'count': 41}, 'AmItheAsshole': {'count': 3730}, 'WallStreetbetsELITE': {'count': 328}, 'GlobalPowers': {'count': 35}, 'ABraThatFits': {'count': 159}, 'PokemonGoFriends': {'count': 1165}, 'NoMansSkyTheGame': {'count': 259}, 'masseffect': {'count': 233}, 'dating_advice': {'count': 559}, 'yoga': {'count': 50}, 'depression': {'count': 515}, 'COVID19positive': {'count': 180}, 'generationology': {'count': 37}, 'feedthebeast': {'count': 192}, 'EliteDangerous': {'count': 270}, 'alcoholicsanonymous': {'count': 93}, 'GoRVing': {'count': 35}, 'thedivision': {'count': 111}, 'breakingmom': {'count': 105}, 'AskAnAmerican': {'count': 80}, 'HypnoFair': {'count': 5}, 'JustUnsubbed': {'count': 13}, 'socialanxiety': {'count': 123}, 'dirtykikpals': {'count': 202}, 'askTO': {'count': 126}, 'AskCulinary': {'count': 108}, 'Bogleheads': {'count': 71}, 'dragonquest': {'count': 45}, 'NoContract': {'count': 30}, 'gorillaz': {'count': 14}, 'MondoGore': {'count': 8}, 'comicswap': {'count': 56}, 'VirtualYoutubers': {'count': 92}, 'Gta5Modding': {'count': 28}, 'obs': {'count': 61}, 'vcu': {'count': 9}, 'KingkillerChronicle': {'count': 17}, 'AmongUs': {'count': 41}, 'wireshark': {'count': 3}, 'Dodocodes': {'count': 46}, 'Aliexpress': {'count': 40}, 'LearnerDriverUK': {'count': 12}, 'PanicAttack': {'count': 23}, 'KassadinMains': {'count': 10}, 'islam': {'count': 93}, 'chronotrigger': {'count': 4}, 'skincareexchange': {'count': 13}, 'PokemonHome': {'count': 21}, 'survivinginfidelity': {'count': 71}, 'igcse': {'count': 21}, 'C25K': {'count': 21}, 'aorus': {'count': 2}, 'idleon': {'count': 19}, 'photography': {'count': 22}, 'cryptocoins': {'count': 7}, 'CanaryWharfBets': {'count': 7}, 'KillingEve': {'count': 7}, 'GameBuilderGarage': {'count': 16}, 'SauceSharingCommunity': {'count': 7}, 'turo': {'count': 9}, 'foodscience': {'count': 14}, 'HIMYM': {'count': 20}, 'HauntingOfHillHouse': {'count': 4}, 'GoodNotes': {'count': 8}, 'RedditWritesSeinfeld': {'count': 6}, 'AirReps': {'count': 2}, 'ADHD': {'count': 3811}, 'BuddyCrossing': {'count': 446}, 'libraryofruina': {'count': 98}, 'SluttyConfessions': {'count': 2787}, 'tipofmytongue': {'count': 7145}, 'fleshlight': {'count': 128}, 'amcstock': {'count': 13910}, 'teenagers': {'count': 77908}, 'suggestmeabook': {'count': 1540}, 'dirtypenpals': {'count': 5587}, 'MinecraftServer': {'count': 177}, 'CreditCards': {'count': 669}, 'Guitar': {'count': 10952}, 'rpg': {'count': 529}, 'NoFap': {'count': 14853}, 'lfg': {'count': 1093}, 'MarsWallStreet': {'count': 935}, 'SummonSign': {'count': 931}, 'AssassinsCreedValhala': {'count': 295}, 'hoi4': {'count': 432}, 'Coins4Sale': {'count': 260}, 'xbox': {'count': 459}, 'TooAfraidToAsk': {'count': 7404}, 'NBA2k': {'count': 553}, 'KGBTR': {'count': 943}, 'roblox': {'count': 220}, 'salesforce': {'count': 214}, 'TwoXChromosomes': {'count': 1736}, 'mechmarket': {'count': 4863}, 'Gaming_Headsets': {'count': 103}, 'pittsburgh': {'count': 189}, 'CryptoMars': {'count': 1606}, 'FridayNightFunkin': {'count': 378}, 'vaginismus': {'count': 122}, 'transpositive': {'count': 10}, 'comicbooks': {'count': 274}, 'BDSMcommunity': {'count': 185}, 'aliens': {'count': 201}, 'Scotch': {'count': 64}, 'KikRoleplay': {'count': 141}, 'Kayaking': {'count': 91}, '196': {'count': 47}, 'digimon': {'count': 140}, 'Evernote': {'count': 42}, 'logh': {'count': 22}, 'arlington': {'count': 15}, 'Adopted': {'count': 8}, 'DissonautUniverse': {'count': 4}, 'Midsommar': {'count': 12}, 'SofiawithanF': {'count': 83}, 'xmpp': {'count': 6}, 'ZombsRoyale': {'count': 16}, 'accesscontrol': {'count': 8}, 'WetlanderHumor': {'count': 2}, 'PoonamPandeyFanatics': {'count': 2}, 'screenplaychallenge': {'count': 2}, 'scatstories': {'count': 2}, 'techsupport': {'count': 290}, 'whatcarshouldIbuy': {'count': 79}, 'Stormlight_Archive': {'count': 15}, 'deadbydaylight': {'count': 126}, 'bicycling': {'count': 27}, 'oculus': {'count': 64}, 'Cartalk': {'count': 33}, 'Sims4': {'count': 43}, 'NoFeeAC': {'count': 95}, 'Crypto_com': {'count': 37}, 'ITCareerQuestions': {'count': 259}, 'aromantic': {'count': 18}, 'Revu': {'count': 3}, 'exalted': {'count': 2}, 'HilariaBaldwin': {'count': 20}, 'Testosterone': {'count': 35}, 'Screenwriting': {'count': 170}, 'LifeProTips': {'count': 49}, 'steinsgate': {'count': 13}, 'Baystreetbets': {'count': 10}, 'AskGirls': {'count': 7}, 'idlechampions': {'count': 7}, 'facebook': {'count': 17}, 'tf2trade': {'count': 4}, 'mfdoom': {'count': 3}, 'FiddlesticksMains': {'count': 2}, 'HFY': {'count': 10}, 'FiestaST': {'count': 2}, 'whatsthatbook': {'count': 994}, 'GearsOfWar': {'count': 879}, 'KazuhaMains': {'count': 175}, 'RepTime': {'count': 211}, 'AstroGaming': {'count': 141}, 'metalgearsolid': {'count': 152}, 'qBittorrent': {'count': 39}, 'ELLIPAL_Official': {'count': 24}, 'raisedbynarcissists': {'count': 4895}, 'unpopularopinion': {'count': 14901}, 'ACTrade': {'count': 5679}, 'askcarsales': {'count': 1339}, 'AskVet': {'count': 1357}, 'whowouldwin': {'count': 4493}, 'playstation': {'count': 1362}, 'anime': {'count': 6531}, 'GME': {'count': 12577}, 'DotA2': {'count': 2004}, 'cryptostreetbets': {'count': 2241}, 'MonsterHunterWorld': {'count': 698}, 'Market76': {'count': 14274}, 'DnD': {'count': 5092}, 'leagueoflegends': {'count': 3683}, 'doordash_drivers': {'count': 1626}, 'theta_network': {'count': 489}, 'exmuslim': {'count': 1369}, 'gonewildaudio': {'count': 2998}, 'conspiracy': {'count': 3587}, 'heroesofthestorm': {'count': 535}, 'FanFiction': {'count': 2782}, 'Doom': {'count': 1251}, 'texas': {'count': 269}, 'Vent': {'count': 1738}, 'selfimprovement': {'count': 1284}, 'youtubers': {'count': 706}, 'askseddit': {'count': 237}, 'boardgames': {'count': 1237}, 'bravelydefault': {'count': 347}, 'ConquerorsBlade': {'count': 238}, 'ChronicPain': {'count': 527}, 'teenagersnew': {'count': 256}, 'brasil': {'count': 1092}, 'MatthiasSubmissions': {'count': 921}, 'MarylandUnemployment': {'count': 314}, 'SaltLakeCity': {'count': 411}, 'BokunoheroFanfiction': {'count': 155}, 'BenignExistence': {'count': 125}, 'GayYoungOldDating': {'count': 156}, 'Bible': {'count': 202}, 'haskell': {'count': 154}, 'seduction': {'count': 400}, 'fantasywriters': {'count': 262}, 'HiveOS': {'count': 100}, 'PerkByDaylight': {'count': 15}, 'Hedgehog': {'count': 73}, 'xmen': {'count': 263}, 'HyperRP': {'count': 122}, 'emotestories': {'count': 3}, 'tutanota': {'count': 135}, 'CultoftheFranklin': {'count': 46}, 'langrisser': {'count': 62}, 'CozyGrove': {'count': 61}, 'Sverigesforsvarsmakt': {'count': 12}, 'silverbugbets': {'count': 21}, 'WreckingBallMains': {'count': 5}, 'capitalism_in_decay': {'count': 8}, 'paintdotnet': {'count': 11}, 'u_mawadom118': {'count': 4}, 'xboxfindfriends': {'count': 2}, 'CPTSD': {'count': 540}, 'destiny2': {'count': 318}, 'Wallstreetsilver': {'count': 1013}, 'DestinyTheGame': {'count': 1107}, 'blackopscoldwar': {'count': 400}, 'InstacartShoppers': {'count': 202}, 'RocketLeagueExchange': {'count': 832}, 'apexlegends': {'count': 3265}, 'kansascity': {'count': 53}, 'namenerds': {'count': 235}, 'help': {'count': 152}, 'Kengan_Ashura': {'count': 132}, 'thetagang': {'count': 165}, 'GameSale': {'count': 262}, 'Reduction': {'count': 109}, 'sex': {'count': 906}, 'bostonr4r': {'count': 75}, 'LegendsOfRuneterra': {'count': 231}, 'overlord': {'count': 48}, 'madisonwi': {'count': 53}, 'steelseries': {'count': 79}, 'ClashOfClansRecruit': {'count': 214}, 'CharacterRant': {'count': 55}, 'AirForce': {'count': 94}, 'sexstories': {'count': 92}, 'NameThatSong': {'count': 162}, 'depressed': {'count': 74}, 'ibs': {'count': 150}, '40kLore': {'count': 269}, 'podcasts': {'count': 88}, 'miraculousladybug': {'count': 150}, 'ask': {'count': 224}, 'EverMerge': {'count': 31}, 'TMJ': {'count': 54}, 'BitLifeApp': {'count': 39}, 'FireEmblemHeroes': {'count': 100}, 'software': {'count': 62}, 'ShieldAndroidTV': {'count': 70}, 'GriefSupport': {'count': 125}, 'onewheel': {'count': 37}, 'MensRights': {'count': 80}, 'nhl': {'count': 22}, 'ClashOfClans': {'count': 107}, 'ps3homebrew': {'count': 33}, 'LightNovels': {'count': 77}, 'redsox': {'count': 34}, 'CryptoMarkets': {'count': 44}, 'ugly': {'count': 47}, 'GCXRep': {'count': 12}, 'cscareerquestionsEU': {'count': 65}, 'MindHunter': {'count': 6}, 'starcraft2coop': {'count': 15}, 'nanocurrency': {'count': 1421}, 'ModelCars': {'count': 8}, 'UKJobs': {'count': 30}, 'Netherlands': {'count': 44}, 'clonewars': {'count': 8}, 'Julia': {'count': 11}, 'Prolactinoma': {'count': 9}, 'sofi': {'count': 11}, 'royalfamily': {'count': 6}, 'ConnecticutR4R': {'count': 8}, 'weather': {'count': 5}, 'oneui': {'count': 7}, 'KTM': {'count': 5}, 'Aerials': {'count': 3}, 'seoul': {'count': 2}, 'exjw': {'count': 3281}, 'ModernMagic': {'count': 699}, 'Paladins': {'count': 1242}, 'kdramarecommends': {'count': 1611}, 'hitbtc': {'count': 330}, 'endocrinology': {'count': 75}, 'Bath': {'count': 43}, 'NassauCountyHookups': {'count': 5}, 'feminineboys': {'count': 1248}, 'dreamsmp': {'count': 2018}, 'SquaredCircle': {'count': 2255}, 'Minecraft': {'count': 8753}, 'spirituality': {'count': 1809}, 'Eldenring': {'count': 1471}, 'Sat': {'count': 1172}, 'bonnaroo': {'count': 194}, 'gardening': {'count': 1892}, 'Unemployment': {'count': 6185}, 'mac': {'count': 1847}, 'Bestbuy': {'count': 437}, 'quittingkratom': {'count': 1081}, 'lawschooladmissions': {'count': 3436}, 'NiceHash': {'count': 2135}, 'McMaster': {'count': 815}, 'covidlonghaulers': {'count': 1299}, 'stalker': {'count': 758}, 'MLBTheShow': {'count': 2721}, 'FortniteCompetitive': {'count': 998}, 'dpdr': {'count': 514}, 'appliancerepair': {'count': 720}, 'thomasthetankengine': {'count': 207}, 'delhi': {'count': 217}, 'Huel': {'count': 300}, 'leafs': {'count': 203}, 'HotWheels': {'count': 170}, '90dayfianceuncensored': {'count': 550}, 'Throwers': {'count': 142}, 'Wavyhair': {'count': 270}, 'CryptoHorde': {'count': 128}, 'ShuumatsuNoValkyrie': {'count': 453}, 'TeensMeetTeens': {'count': 432}, 'dbrand': {'count': 108}, 'SLFmeetups': {'count': 18}, '1200isplentyketo': {'count': 48}, 'passive_income': {'count': 211}, 'BroadCity': {'count': 16}, 'RevenantMain': {'count': 71}, 'extrarfl': {'count': 25}, 'AgonGame': {'count': 5}, 'FitnessDE': {'count': 3}, 'gaming': {'count': 1277}, 'livesound': {'count': 91}, 'IBO': {'count': 1896}, 'EscapefromTarkov': {'count': 1300}, 'amex': {'count': 145}, 'DMAcademy': {'count': 1411}, 'VinylCollectors': {'count': 556}, 'cardano': {'count': 716}, 'brave_browser': {'count': 159}, 'dating': {'count': 952}, 'OculusQuest': {'count': 942}, 'Superstonk': {'count': 3089}, 'MtF': {'count': 957}, 'findaleague': {'count': 207}, 'Nioh': {'count': 398}, 'IRS': {'count': 715}, 'transgendercirclejerk': {'count': 353}, 'learnmath': {'count': 489}, 'piano': {'count': 263}, 'LeagueConnect': {'count': 216}, 'eu4': {'count': 561}, 'Wordpress': {'count': 345}, 'RoleplayingForReddit': {'count': 31}, 'LOONA': {'count': 89}, 'newtothenavy': {'count': 167}, 'HaircareScience': {'count': 118}, 'appletv': {'count': 167}, 'sissypersonals': {'count': 102}, 'raleigh': {'count': 168}, 'realonlyfansreviews': {'count': 21}, 'AskGames': {'count': 49}, 'PokemonTCG': {'count': 325}, 'controlgame': {'count': 109}, 'GoogleDataStudio': {'count': 16}, 'WhiteWolfRPG': {'count': 139}, 'MECoOp': {'count': 31}, 'snuffrp': {'count': 46}, 'lockpicking': {'count': 103}, 'wicked_edge': {'count': 105}, 'BMW': {'count': 99}, 'choiceofgames': {'count': 24}, 'hisdarkmaterials': {'count': 12}, 'SakuraGakuin': {'count': 24}, 'detrans': {'count': 55}, 'Smallville': {'count': 37}, 'kingofqueens': {'count': 7}, 'JamesHoffmann': {'count': 22}, 'stashinvest': {'count': 16}, 'ABA': {'count': 79}, 'ladybusiness': {'count': 10}, 'gamegrumps': {'count': 32}, 'GodEater': {'count': 21}, 'tomorrow': {'count': 39}, 'Tomorrowland': {'count': 9}, 'BlackCountryNewRoad': {'count': 5}, 'STAYC': {'count': 3}, 'SatoshiStreetBets': {'count': 3828}, 'AskLosAngeles': {'count': 1036}, 'buildapcforme': {'count': 1689}, 'ApplyingToCollege': {'count': 10675}, 'watercooling': {'count': 1209}, 'BreakUps': {'count': 4914}, 'FIFA': {'count': 3811}, 'emacs': {'count': 712}, 'trakstocks': {'count': 691}, 'Shittyaskflying': {'count': 147}, 'AmazonFC': {'count': 1178}, 'stocks': {'count': 4610}, 'BangaloreMains': {'count': 26}, 'pokemon': {'count': 3953}, 'religion': {'count': 684}, 'cuboulder': {'count': 269}, 'self': {'count': 1688}, 'tarot': {'count': 912}, 'turtles': {'count': 49}, 'TheMagnusArchives': {'count': 300}, 'Superhero_Ideas': {'count': 34}, 'NTU': {'count': 308}, 'touhou': {'count': 623}, 'JoJolion': {'count': 50}, 'lasers': {'count': 27}, 'popperpigs': {'count': 67}, 'aggretsuko': {'count': 20}, 'Library': {'count': 5}}}} | | [RestaurantReviewSentimentClassification](https://link.springer.com/chapter/10.1007/978-3-319-18117-2_2) (ElSahar et al., 2015) | ['ara'] | Classification | s2s | [Reviews, Written] | None | None | | [RiaNewsRetrieval](https://arxiv.org/abs/1901.07786) (Gavrilov et al., 2019) | ['rus'] | Retrieval | s2p | [News, Written] | None | None | | [RiaNewsRetrievalHardNegatives](https://arxiv.org/abs/1901.07786) (Gavrilov et al., 2019) | ['rus'] | Retrieval | s2p | [News, Written] | None | None | @@ -438,7 +438,7 @@ The following tables give you an overview of the tasks in MTEB. | [RuReviewsClassification](https://github.com/sismetanin/rureviews) (Sergey Smetanin, 2019) | ['rus'] | Classification | p2p | [Reviews, Written] | None | None | | [RuSTSBenchmarkSTS](https://github.com/PhilipMay/stsb-multi-mt/) (Philip May, 2021) | ['rus'] | STS | s2s | [News, Social, Web, Written] | None | None | | [RuSciBenchGRNTIClassification](https://github.com/mlsa-iai-msu-lab/ru_sci_bench/) | ['rus'] | Classification | p2p | [Academic, Written] | None | None | -| [RuSciBenchGRNTIClusteringP2P](https://github.com/mlsa-iai-msu-lab/ru_sci_bench/) | ['rus'] | Clustering | p2p | [Academic, Written] | {'test': 2048} | {'test': {'num_samples': 2048, 'number_of_characters': 1822339, 'average_text_length': 889.81, 'average_labels_per_text': 1.0, 'unique_labels': 28, 'labels': {'3': {'count': 73}, '4': {'count': 73}, '20': {'count': 73}, '9': {'count': 73}, '21': {'count': 73}, '15': {'count': 73}, '16': {'count': 74}, '2': {'count': 73}, '8': {'count': 73}, '23': {'count': 73}, '6': {'count': 73}, '24': {'count': 73}, '10': {'count': 73}, '1': {'count': 73}, '17': {'count': 74}, '14': {'count': 74}, '18': {'count': 73}, '27': {'count': 73}, '19': {'count': 73}, '22': {'count': 73}, '12': {'count': 73}, '25': {'count': 73}, '5': {'count': 74}, '0': {'count': 73}, '26': {'count': 73}, '11': {'count': 73}, '13': {'count': 73}, '7': {'count': 73}}}} | +| [RuSciBenchGRNTIClusteringP2P](https://github.com/mlsa-iai-msu-lab/ru_sci_bench/) | ['rus'] | Clustering | p2p | [Academic, Written] | {'test': 2048} | {'test': {'num_samples': 2048, 'number_of_characters': 1822339, 'min_text_length': 84, 'average_text_length': 889.81, 'max_text_length': 3143, 'min_labels_per_text': 73, 'average_labels_per_text': 1.0, 'max_labels_per_text': 74, 'unique_labels': 28, 'labels': {'3': {'count': 73}, '4': {'count': 73}, '20': {'count': 73}, '9': {'count': 73}, '21': {'count': 73}, '15': {'count': 73}, '16': {'count': 74}, '2': {'count': 73}, '8': {'count': 73}, '23': {'count': 73}, '6': {'count': 73}, '24': {'count': 73}, '10': {'count': 73}, '1': {'count': 73}, '17': {'count': 74}, '14': {'count': 74}, '18': {'count': 73}, '27': {'count': 73}, '19': {'count': 73}, '22': {'count': 73}, '12': {'count': 73}, '25': {'count': 73}, '5': {'count': 74}, '0': {'count': 73}, '26': {'count': 73}, '11': {'count': 73}, '13': {'count': 73}, '7': {'count': 73}}}} | | [RuSciBenchOECDClassification](https://github.com/mlsa-iai-msu-lab/ru_sci_bench/) | ['rus'] | Classification | p2p | [Academic, Written] | None | None | | [RuSciBenchOECDClusteringP2P](https://github.com/mlsa-iai-msu-lab/ru_sci_bench/) | ['rus'] | Clustering | p2p | [Academic, Written] | None | None | | [SCDBPAccountabilityLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | @@ -467,12 +467,12 @@ The following tables give you an overview of the tasks in MTEB. | [SNLHierarchicalClusteringS2S](https://huggingface.co/datasets/navjordj/SNL_summarization) (Navjord et al., 2023) | ['nob'] | Clustering | s2s | [Encyclopaedic, Non-fiction, Written] | None | None | | [SNLRetrieval](https://huggingface.co/datasets/navjordj/SNL_summarization) (Navjord et al., 2023) | ['nob'] | Retrieval | p2p | [Encyclopaedic, Non-fiction, Written] | None | None | | [SRNCorpusBitextMining](https://arxiv.org/abs/2212.06383) (Zwennicker et al., 2022) | ['nld', 'srn'] | BitextMining | s2s | [Social, Web, Written] | None | None | -| [STS12](https://www.aclweb.org/anthology/S12-1051.pdf) (Agirre et al., 2012) | ['eng'] | STS | s2s | [Encyclopaedic, News, Written] | {'test': 3108} | {'test': {'num_samples': 3108, 'number_of_characters': 402118, 'average_sentence1_len': 63.79, 'average_sentence2_len': 65.59, 'avg_score': 3.51}} | +| [STS12](https://www.aclweb.org/anthology/S12-1051.pdf) (Agirre et al., 2012) | ['eng'] | STS | s2s | [Encyclopaedic, News, Written] | {'test': 3108} | {'test': {'num_samples': 3108, 'number_of_characters': 402118, 'min_sentence1_length': 3, 'average_sentence1_len': 63.79, 'max_sentence1_length': 220, 'unique_sentence1': 2236, 'min_sentence2_length': 7, 'average_sentence2_len': 65.59, 'max_sentence2_length': 204, 'unique_sentence2': 2797, 'min_score': 0.0, 'avg_score': 3.51, 'max_score': 5.0}} | | [STS13](https://www.aclweb.org/anthology/S13-1004/) (Eneko Agirre, 2013) | ['eng'] | STS | s2s | [Web, News, Non-fiction, Written] | None | None | | [STS14](https://www.aclweb.org/anthology/S14-1002) | ['eng'] | STS | s2s | [Blog, Web, Spoken] | None | None | | [STS15](https://www.aclweb.org/anthology/S15-2010) | ['eng'] | STS | s2s | [Blog, News, Web, Written, Spoken] | None | None | | [STS16](https://www.aclweb.org/anthology/S16-1001) | ['eng'] | STS | s2s | [Blog, Web, Spoken] | None | None | -| [STS17](https://alt.qcri.org/semeval2017/task1/) | ['ara', 'deu', 'eng', 'fra', 'ita', 'kor', 'nld', 'spa', 'tur'] | STS | s2s | [News, Web, Written] | {'test': 5346} | {'test': {'num_samples': 5346, 'number_of_characters': 400264, 'average_sentence1_len': 38.15, 'average_sentence2_len': 36.73, 'avg_score': 2.36, 'hf_subset_descriptive_stats': {'ko-ko': {'num_samples': 2846, 'number_of_characters': 183387, 'average_sentence1_len': 31.99, 'average_sentence2_len': 32.44, 'avg_score': 2.47}, 'ar-ar': {'num_samples': 250, 'number_of_characters': 16247, 'average_sentence1_len': 32.21, 'average_sentence2_len': 32.78, 'avg_score': 2.22}, 'en-ar': {'num_samples': 250, 'number_of_characters': 18764, 'average_sentence1_len': 42.36, 'average_sentence2_len': 32.7, 'avg_score': 2.14}, 'en-de': {'num_samples': 250, 'number_of_characters': 22177, 'average_sentence1_len': 43.95, 'average_sentence2_len': 44.76, 'avg_score': 2.28}, 'en-en': {'num_samples': 250, 'number_of_characters': 21669, 'average_sentence1_len': 43.95, 'average_sentence2_len': 42.72, 'avg_score': 2.28}, 'en-tr': {'num_samples': 250, 'number_of_characters': 20879, 'average_sentence1_len': 41.92, 'average_sentence2_len': 41.6, 'avg_score': 2.13}, 'es-en': {'num_samples': 250, 'number_of_characters': 23216, 'average_sentence1_len': 50.84, 'average_sentence2_len': 42.02, 'avg_score': 2.15}, 'es-es': {'num_samples': 250, 'number_of_characters': 25265, 'average_sentence1_len': 49.84, 'average_sentence2_len': 51.22, 'avg_score': 2.23}, 'fr-en': {'num_samples': 250, 'number_of_characters': 23087, 'average_sentence1_len': 49.62, 'average_sentence2_len': 42.72, 'avg_score': 2.28}, 'it-en': {'num_samples': 250, 'number_of_characters': 23188, 'average_sentence1_len': 50.03, 'average_sentence2_len': 42.72, 'avg_score': 2.28}, 'nl-en': {'num_samples': 250, 'number_of_characters': 22385, 'average_sentence1_len': 46.82, 'average_sentence2_len': 42.72, 'avg_score': 2.28}}}} | +| [STS17](https://alt.qcri.org/semeval2017/task1/) | ['ara', 'deu', 'eng', 'fra', 'ita', 'kor', 'nld', 'spa', 'tur'] | STS | s2s | [News, Web, Written] | {'test': 5346} | {'test': {'num_samples': 5346, 'number_of_characters': 400264, 'min_sentence1_length': 6, 'average_sentence1_len': 38.15, 'max_sentence1_length': 976, 'unique_sentence1': 4900, 'min_sentence2_length': 6, 'average_sentence2_len': 36.73, 'max_sentence2_length': 1007, 'unique_sentence2': 4470, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0, 'hf_subset_descriptive_stats': {'ko-ko': {'num_samples': 2846, 'number_of_characters': 183387, 'min_sentence1_length': 6, 'average_sentence1_len': 31.99, 'max_sentence1_length': 976, 'unique_sentence1': 2650, 'min_sentence2_length': 6, 'average_sentence2_len': 32.44, 'max_sentence2_length': 1007, 'unique_sentence2': 2720, 'min_score': 0.0, 'avg_score': 2.47, 'max_score': 5.0}, 'ar-ar': {'num_samples': 250, 'number_of_characters': 16247, 'min_sentence1_length': 11, 'average_sentence1_len': 32.21, 'max_sentence1_length': 99, 'unique_sentence1': 250, 'min_sentence2_length': 9, 'average_sentence2_len': 32.78, 'max_sentence2_length': 83, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.22, 'max_score': 5.0}, 'en-ar': {'num_samples': 250, 'number_of_characters': 18764, 'min_sentence1_length': 13, 'average_sentence1_len': 42.36, 'max_sentence1_length': 105, 'unique_sentence1': 250, 'min_sentence2_length': 10, 'average_sentence2_len': 32.7, 'max_sentence2_length': 104, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.14, 'max_score': 5.0}, 'en-de': {'num_samples': 250, 'number_of_characters': 22177, 'min_sentence1_length': 12, 'average_sentence1_len': 43.95, 'max_sentence1_length': 94, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 44.76, 'max_sentence2_length': 104, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'en-en': {'num_samples': 250, 'number_of_characters': 21669, 'min_sentence1_length': 12, 'average_sentence1_len': 43.95, 'max_sentence1_length': 94, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 42.72, 'max_sentence2_length': 101, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'en-tr': {'num_samples': 250, 'number_of_characters': 20879, 'min_sentence1_length': 15, 'average_sentence1_len': 41.92, 'max_sentence1_length': 101, 'unique_sentence1': 250, 'min_sentence2_length': 10, 'average_sentence2_len': 41.6, 'max_sentence2_length': 107, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.13, 'max_score': 5.0}, 'es-en': {'num_samples': 250, 'number_of_characters': 23216, 'min_sentence1_length': 12, 'average_sentence1_len': 50.84, 'max_sentence1_length': 160, 'unique_sentence1': 250, 'min_sentence2_length': 14, 'average_sentence2_len': 42.02, 'max_sentence2_length': 117, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.15, 'max_score': 5.0}, 'es-es': {'num_samples': 250, 'number_of_characters': 25265, 'min_sentence1_length': 18, 'average_sentence1_len': 49.84, 'max_sentence1_length': 136, 'unique_sentence1': 250, 'min_sentence2_length': 13, 'average_sentence2_len': 51.22, 'max_sentence2_length': 129, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.23, 'max_score': 5.0}, 'fr-en': {'num_samples': 250, 'number_of_characters': 23087, 'min_sentence1_length': 19, 'average_sentence1_len': 49.62, 'max_sentence1_length': 115, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 42.72, 'max_sentence2_length': 101, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'it-en': {'num_samples': 250, 'number_of_characters': 23188, 'min_sentence1_length': 15, 'average_sentence1_len': 50.03, 'max_sentence1_length': 113, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 42.72, 'max_sentence2_length': 101, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'nl-en': {'num_samples': 250, 'number_of_characters': 22385, 'min_sentence1_length': 14, 'average_sentence1_len': 46.82, 'max_sentence1_length': 123, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 42.72, 'max_sentence2_length': 101, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}}}} | | [STS22.v2](https://competitions.codalab.org/competitions/33835) | ['ara', 'cmn', 'deu', 'eng', 'fra', 'ita', 'pol', 'rus', 'spa', 'tur'] | STS | p2p | [News, Written] | None | None | | [STSB](https://aclanthology.org/2021.emnlp-main.357) (Shitao Xiao, 2024) | ['cmn'] | STS | s2s | | None | None | | [STSBenchmark](https://github.com/PhilipMay/stsb-multi-mt/) (Philip May, 2021) | ['eng'] | STS | s2s | | None | None | @@ -490,7 +490,7 @@ The following tables give you an overview of the tasks in MTEB. | [SinhalaNewsClassification](https://huggingface.co/datasets/NLPC-UOM/Sinhala-News-Category-classification) (Nisansa de Silva, 2015) | ['sin'] | Classification | s2s | [News, Written] | None | None | | [SinhalaNewsSourceClassification](https://huggingface.co/datasets/NLPC-UOM/Sinhala-News-Source-classification) (Dhananjaya et al., 2022) | ['sin'] | Classification | s2s | [News, Written] | None | None | | [SiswatiNewsClassification](https://huggingface.co/datasets/dsfsi/za-isizulu-siswati-news) (Madodonga et al., 2023) | ['ssw'] | Classification | s2s | [News, Written] | None | None | -| [SlovakHateSpeechClassification](https://huggingface.co/datasets/TUKE-KEMT/hate_speech_slovak) | ['slk'] | Classification | s2s | [Social, Written] | {'test': 1319} | {'test': {'num_samples': 1319, 'number_of_characters': 122279, 'average_text_length': 92.71, 'unique_labels': 2, 'labels': {'1': {'count': 360}, '0': {'count': 959}}}} | +| [SlovakHateSpeechClassification](https://huggingface.co/datasets/TUKE-KEMT/hate_speech_slovak) | ['slk'] | Classification | s2s | [Social, Written] | {'test': 1319, 'train': 11870} | {'test': {'num_samples': 1319, 'number_of_characters': 122279, 'num_texts_in_train': 46, 'min_text_length': 8, 'average_text_length': 92.71, 'max_text_length': 1584, 'unique_text': 1315, 'unique_labels': 2, 'labels': {'1': {'count': 360}, '0': {'count': 959}}}, 'train': {'num_samples': 11870, 'number_of_characters': 1130860, 'num_texts_in_train': None, 'min_text_length': 7, 'average_text_length': 95.27, 'max_text_length': 2112, 'unique_text': 11655, 'unique_labels': 2, 'labels': {'1': {'count': 3245}, '0': {'count': 8625}}}} | | [SlovakMovieReviewSentimentClassification](https://arxiv.org/pdf/2304.01922) ({ {S, 2023) | ['svk'] | Classification | s2s | [Reviews, Written] | None | None | | [SlovakSumRetrieval](https://huggingface.co/datasets/NaiveNeuron/slovaksum) | ['slk'] | Retrieval | s2s | [News, Social, Web, Written] | None | None | | [SouthAfricanLangClassification](https://www.kaggle.com/competitions/south-african-language-identification/) (ExploreAI Academy et al., 2022) | ['afr', 'eng', 'nbl', 'nso', 'sot', 'ssw', 'tsn', 'tso', 'ven', 'xho', 'zul'] | Classification | s2s | [Web, Non-fiction, Written] | None | None | @@ -504,7 +504,7 @@ The following tables give you an overview of the tasks in MTEB. | [StackExchangeClustering.v2](https://arxiv.org/abs/2104.07081) (Gregor Geigle, 2021) | ['eng'] | Clustering | s2s | [Web, Written] | None | None | | [StackExchangeClusteringP2P.v2](https://arxiv.org/abs/2104.07081) (Gregor Geigle, 2021) | ['eng'] | Clustering | p2p | [Web, Written] | None | None | | [StackOverflowDupQuestions](https://www.microsoft.com/en-us/research/uploads/prod/2019/03/nl4se18LinkSO.pdf) (Xueqing Liu, 2018) | ['eng'] | Reranking | s2s | | None | None | -| [StackOverflowQA](https://arxiv.org/abs/2407.02883) (Xiangyang Li, 2024) | ['eng'] | Retrieval | p2p | [Programming, Written] | {'test': 21925} | {'test': {'number_of_characters': 2506.11, 'num_samples': 21925, 'num_queries': 1994, 'num_documents': 19931, 'average_document_length': 0.06, 'average_query_length': 0.65, 'average_relevant_docs_per_query': 1.0}} | +| [StackOverflowQA](https://arxiv.org/abs/2407.02883) (Xiangyang Li, 2024) | ['eng'] | Retrieval | p2p | [Programming, Written] | {'test': 21925} | {'test': {'number_of_characters': 26584028, 'num_samples': 21925, 'num_queries': 1994, 'num_documents': 19931, 'min_document_length': 61, 'average_document_length': 130.32, 'max_document_length': 22234, 'unique_documents': 19931, 'min_query_length': 5, 'average_query_length': 12029.38, 'max_query_length': 46028, 'unique_queries': 1994, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1994}} | | [StatcanDialogueDatasetRetrieval](https://mcgill-nlp.github.io/statcan-dialogue-dataset/) | ['eng', 'fra'] | Retrieval | s2p | [Government, Web, Written] | None | None | | [SummEvalFrSummarization.v2](https://github.com/Yale-LILY/SummEval) (Fabbri et al., 2020) | ['fra'] | Summarization | p2p | [News, Written] | None | None | | [SummEvalSummarization.v2](https://github.com/Yale-LILY/SummEval) (Fabbri et al., 2020) | ['eng'] | Summarization | p2p | [News, Written] | None | None | @@ -518,7 +518,7 @@ The following tables give you an overview of the tasks in MTEB. | [SwissJudgementClassification](https://aclanthology.org/2021.nllp-1.3/) (Joel Niklaus, 2022) | ['deu', 'fra', 'ita'] | Classification | s2s | [Legal, Written] | None | None | | [SyntecReranking](https://huggingface.co/datasets/lyon-nlp/mteb-fr-reranking-syntec-s2p) (Mathieu Ciancone, 2024) | ['fra'] | Reranking | s2p | [Legal, Written] | None | None | | [SyntecRetrieval](https://huggingface.co/datasets/lyon-nlp/mteb-fr-retrieval-syntec-s2p) (Mathieu Ciancone, 2024) | ['fra'] | Retrieval | s2p | [Legal, Written] | None | None | -| [SyntheticText2SQL](https://huggingface.co/datasets/gretelai/synthetic_text_to_sql) (Meyer et al., 2024) | ['eng', 'sql'] | Retrieval | p2p | [Programming, Written] | {'test': 111702} | {'test': {'number_of_characters': 210.98, 'num_samples': 111702, 'num_queries': 5851, 'num_documents': 105851, 'average_document_length': 0.0, 'average_query_length': 0.01, 'average_relevant_docs_per_query': 1.0}} | +| [SyntheticText2SQL](https://huggingface.co/datasets/gretelai/synthetic_text_to_sql) (Meyer et al., 2024) | ['eng', 'sql'] | Retrieval | p2p | [Programming, Written] | {'test': 111702} | {'test': {'number_of_characters': 14041553, 'num_samples': 111702, 'num_queries': 5851, 'num_documents': 105851, 'min_document_length': 13, 'average_document_length': 4.58, 'max_document_length': 281, 'unique_documents': 105851, 'min_query_length': 17, 'average_query_length': 2316.95, 'max_query_length': 762, 'unique_queries': 5851, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 5851}} | | [T2Reranking](https://arxiv.org/abs/2304.03679) (Xiaohui Xie, 2023) | ['cmn'] | Reranking | s2s | | None | None | | [T2Retrieval](https://arxiv.org/abs/2304.03679) (Xiaohui Xie, 2023) | ['cmn'] | Retrieval | s2p | | None | None | | [TERRa](https://arxiv.org/pdf/2010.15925) (Shavrina et al., 2020) | ['rus'] | PairClassification | s2s | [News, Web, Written] | None | None | @@ -528,7 +528,7 @@ The following tables give you an overview of the tasks in MTEB. | [TV2Nordretrieval](https://huggingface.co/datasets/alexandrainst/nordjylland-news-summarization) | ['dan'] | Retrieval | p2p | [News, Non-fiction, Written] | None | None | | [TamilNewsClassification](https://github.com/vanangamudi/tamil-news-classification) (Anoop Kunchukuttan, 2020) | ['tam'] | Classification | s2s | [News, Written] | None | None | | [Tatoeba](https://github.com/facebookresearch/LASER/tree/main/data/tatoeba/v1) (Tatoeba community, 2021) | ['afr', 'amh', 'ang', 'ara', 'arq', 'arz', 'ast', 'awa', 'aze', 'bel', 'ben', 'ber', 'bos', 'bre', 'bul', 'cat', 'cbk', 'ceb', 'ces', 'cha', 'cmn', 'cor', 'csb', 'cym', 'dan', 'deu', 'dsb', 'dtp', 'ell', 'eng', 'epo', 'est', 'eus', 'fao', 'fin', 'fra', 'fry', 'gla', 'gle', 'glg', 'gsw', 'heb', 'hin', 'hrv', 'hsb', 'hun', 'hye', 'ido', 'ile', 'ina', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kat', 'kaz', 'khm', 'kor', 'kur', 'kzj', 'lat', 'lfn', 'lit', 'lvs', 'mal', 'mar', 'max', 'mhr', 'mkd', 'mon', 'nds', 'nld', 'nno', 'nob', 'nov', 'oci', 'orv', 'pam', 'pes', 'pms', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'swg', 'swh', 'tam', 'tat', 'tel', 'tgl', 'tha', 'tuk', 'tur', 'tzl', 'uig', 'ukr', 'urd', 'uzb', 'vie', 'war', 'wuu', 'xho', 'yid', 'yue', 'zsm'] | BitextMining | s2s | [Written] | None | None | -| [TbilisiCityHallBitextMining](https://huggingface.co/datasets/jupyterjazz/tbilisi-city-hall-titles) | ['eng', 'kat'] | BitextMining | s2s | [News, Written] | None | None | +| [TbilisiCityHallBitextMining](https://huggingface.co/datasets/jupyterjazz/tbilisi-city-hall-titles) | ['eng', 'kat'] | BitextMining | s2s | [News, Written] | {'test': 3640} | {'test': {'num_samples': 3640, 'number_of_characters': 572146, 'unique_pairs': 3640, 'min_sentence1_length': 13, 'average_sentence1_length': 78.59, 'max_sentence1_length': 203, 'unique_sentence1': 3636, 'min_sentence2_length': 13, 'average_sentence2_length': 78.59, 'max_sentence2_length': 203, 'unique_sentence2': 3636, 'hf_subset_descriptive_stats': {'kat_Geor-eng_Latn': {'num_samples': 1820, 'number_of_characters': 286073, 'unique_pairs': 1820, 'min_sentence1_length': 30, 'average_sentence1_length': 76.07, 'max_sentence1_length': 189, 'unique_sentence1': 1820, 'min_sentence2_length': 13, 'average_sentence2_length': 81.12, 'max_sentence2_length': 203, 'unique_sentence2': 1816}, 'eng_Latn-kat_Geor': {'num_samples': 1820, 'number_of_characters': 286073, 'unique_pairs': 1820, 'min_sentence1_length': 13, 'average_sentence1_length': 81.12, 'max_sentence1_length': 203, 'unique_sentence1': 1816, 'min_sentence2_length': 30, 'average_sentence2_length': 76.07, 'max_sentence2_length': 189, 'unique_sentence2': 1820}}}} | | [TelemarketingSalesRuleLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [TeluguAndhraJyotiNewsClassification](https://github.com/AnushaMotamarri/Telugu-Newspaper-Article-Dataset) | ['tel'] | Classification | s2s | [News, Written] | None | None | | [TempReasonL1](https://github.com/DAMO-NLP-SG/TempReason) (Xiao et al., 2024) | ['eng'] | Retrieval | s2s | [Encyclopaedic, Written] | None | None | @@ -547,7 +547,7 @@ The following tables give you an overview of the tasks in MTEB. | [ThuNewsClusteringS2S.v2](http://thuctc.thunlp.org/) (Sun et al., 2016) | ['cmn'] | Clustering | s2s | [News, Written] | None | None | | [TopiOCQA](https://mcgill-nlp.github.io/topiocqa) (Vaibhav Adlakha, 2022) | ['eng'] | Retrieval | s2p | [Encyclopaedic, Written] | None | None | | [TopiOCQAHardNegatives](https://mcgill-nlp.github.io/topiocqa) (Vaibhav Adlakha, 2022) | ['eng'] | Retrieval | s2p | [Encyclopaedic, Written] | None | None | -| [Touche2020Retrieval.v3](https://github.com/castorini/touche-error-analysis) | ['eng'] | Retrieval | s2p | [Academic] | {'test': 303781} | {'test': {'number_of_characters': 2140.82, 'num_samples': 303781, 'num_queries': 49, 'num_documents': 303732, 'average_document_length': 0.01, 'average_query_length': 0.89, 'average_relevant_docs_per_query': 34.94}} | +| [Touche2020Retrieval.v3](https://github.com/castorini/touche-error-analysis) | ['eng'] | Retrieval | s2p | [Academic] | {'test': 303781} | {'test': {'number_of_characters': 637047138, 'num_samples': 303781, 'num_queries': 49, 'num_documents': 303732, 'min_document_length': 16, 'average_document_length': 0.01, 'max_document_length': 83, 'unique_documents': 303732, 'min_query_length': 41, 'average_query_length': 13000918.57, 'max_query_length': 105983, 'unique_queries': 49, 'min_relevant_docs_per_query': 40, 'average_relevant_docs_per_query': 58.14, 'max_relevant_docs_per_query': 87, 'unique_relevant_docs': 2732}} | | [ToxicChatClassification](https://aclanthology.org/2023.findings-emnlp.311/) (Zi Lin, 2023) | ['eng'] | Classification | s2s | [Constructed, Written] | None | None | | [ToxicConversationsClassification](https://www.kaggle.com/competitions/jigsaw-unintended-bias-in-toxicity-classification/overview) (cjadams, 2019) | ['eng'] | Classification | s2s | [Social, Written] | None | None | | [TswanaNewsClassification](https://link.springer.com/chapter/10.1007/978-3-031-49002-6_17) (Vukosi Marivate, 2023) | ['tsn'] | Classification | s2s | [News, Written] | None | None | @@ -560,10 +560,10 @@ The following tables give you an overview of the tasks in MTEB. | [TweetSentimentClassification](https://aclanthology.org/2022.lrec-1.27) | ['ara', 'deu', 'eng', 'fra', 'hin', 'ita', 'por', 'spa'] | Classification | s2s | [Social, Written] | None | None | | [TweetSentimentExtractionClassification](https://www.kaggle.com/competitions/tweet-sentiment-extraction/overview) (Maggie et al., 2020) | ['eng'] | Classification | s2s | [Social, Written] | None | None | | [TweetTopicSingleClassification](https://arxiv.org/abs/2209.09824) | ['eng'] | Classification | s2s | [Social, News, Written] | None | None | -| [TwentyNewsgroupsClustering.v2](https://scikit-learn.org/0.19/datasets/twenty_newsgroups.html) (Ken Lang, 1995) | ['eng'] | Clustering | s2s | [News, Written] | None | None | +| [TwentyNewsgroupsClustering.v2](https://scikit-learn.org/0.19/datasets/twenty_newsgroups.html) (Ken Lang, 1995) | ['eng'] | Clustering | s2s | [News, Written] | {'test': 59545} | {'test': {'num_samples': 59545, 'number_of_characters': 1907719, 'min_text_length': 11, 'average_text_length': 32.04, 'max_text_length': 120, 'min_labels_per_text': 2082, 'average_labels_per_text': 1.0, 'max_labels_per_text': 3236, 'unique_labels': 20, 'labels': {'12': {'count': 3137}, '6': {'count': 3070}, '0': {'count': 2613}, '2': {'count': 3155}, '10': {'count': 3220}, '17': {'count': 2986}, '14': {'count': 3106}, '13': {'count': 3055}, '1': {'count': 3056}, '16': {'count': 2911}, '9': {'count': 2984}, '3': {'count': 3070}, '15': {'count': 3090}, '7': {'count': 3036}, '5': {'count': 3124}, '11': {'count': 3236}, '18': {'count': 2483}, '8': {'count': 3090}, '19': {'count': 2082}, '4': {'count': 3041}}}} | | [TwitterHjerneRetrieval](https://huggingface.co/datasets/sorenmulli/da-hashtag-twitterhjerne) (Holm et al., 2024) | ['dan'] | Retrieval | p2p | [Social, Written] | None | None | | [TwitterSemEval2015](https://alt.qcri.org/semeval2015/task1/) | ['eng'] | PairClassification | s2s | | None | None | -| [TwitterURLCorpus](https://languagenet.github.io/) | ['eng'] | PairClassification | s2s | | {'test': 51534} | {'test': {'num_samples': 51534, 'number_of_characters': 8659940, 'avg_sentence1_len': 79.49, 'avg_sentence2_len': 88.55, 'unique_labels': 2, 'labels': {'0': {'count': 38546}, '1': {'count': 12988}}}} | +| [TwitterURLCorpus](https://languagenet.github.io/) | ['eng'] | PairClassification | s2s | | {'test': 51534} | {'test': {'num_samples': 51534, 'number_of_characters': 8659940, 'min_sentence1_length': 24, 'avg_sentence1_length': 79.49, 'max_sentence1_length': 126, 'unique_sentence1': 4329, 'min_sentence2_length': 6, 'avg_sentence2_length': 88.55, 'max_sentence2_length': 608, 'unique_sentence2': 41304, 'unique_labels': 2, 'labels': {'0': {'count': 38546}, '1': {'count': 12988}}}} | | [UCCVCommonLawLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [UkrFormalityClassification](https://huggingface.co/datasets/ukr-detect/ukr-formality-dataset-translated-gyafc) | ['ukr'] | Classification | s2s | [News, Written] | None | None | | [UnfairTOSLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | @@ -571,7 +571,7 @@ The following tables give you an overview of the tasks in MTEB. | [VGHierarchicalClusteringP2P](https://huggingface.co/datasets/navjordj/VG_summarization) (Navjord et al., 2023) | ['nob'] | Clustering | p2p | [News, Non-fiction, Written] | None | None | | [VGHierarchicalClusteringS2S](https://huggingface.co/datasets/navjordj/VG_summarization) (Navjord et al., 2023) | ['nob'] | Clustering | p2p | [News, Non-fiction, Written] | None | None | | [VideoRetrieval](https://arxiv.org/abs/2203.03367) | ['cmn'] | Retrieval | s2p | | None | None | -| [VieMedEVBitextMining](https://aclanthology.org/2015.iwslt-evaluation.11/) (Nhu Vo, 2024) | ['eng', 'vie'] | BitextMining | s2s | [Medical, Written] | None | None | +| [VieMedEVBitextMining](https://aclanthology.org/2015.iwslt-evaluation.11/) (Nhu Vo, 2024) | ['eng', 'vie'] | BitextMining | s2s | [Medical, Written] | {'test': 2048} | {'test': {'num_samples': 2048, 'number_of_characters': 575910, 'unique_pairs': 2048, 'min_sentence1_length': 11, 'average_sentence1_length': 139.23, 'max_sentence1_length': 1291, 'unique_sentence1': 2048, 'min_sentence2_length': 11, 'average_sentence2_length': 141.98, 'max_sentence2_length': 1217, 'unique_sentence2': 2047}} | | [VieQuADRetrieval](https://aclanthology.org/2020.coling-main.233.pdf) | ['vie'] | Retrieval | s2p | [Encyclopaedic, Non-fiction, Written] | None | None | | [VieStudentFeedbackClassification](https://ieeexplore.ieee.org/document/8573337) (Nguyen et al., 2018) | ['vie'] | Classification | s2s | [Reviews, Written] | None | None | | [VoyageMMarcoReranking](https://arxiv.org/abs/2312.16144) (Benjamin Clavié, 2023) | ['jpn'] | Reranking | s2s | [Academic, Non-fiction, Written] | None | None | @@ -580,12 +580,12 @@ The following tables give you an overview of the tasks in MTEB. | [WebLINXCandidatesReranking](https://mcgill-nlp.github.io/weblinx) (Xing Han Lù, 2024) | ['eng'] | Reranking | p2p | [Academic, Web, Written] | None | None | | [WikiCitiesClustering](https://huggingface.co/datasets/wikipedia) | ['eng'] | Clustering | p2p | [Encyclopaedic, Written] | None | None | | [WikiClusteringP2P.v2](https://github.com/Rysias/wiki-clustering) | ['bos', 'cat', 'ces', 'dan', 'eus', 'glv', 'ilo', 'kur', 'lav', 'min', 'mlt', 'sco', 'sqi', 'wln'] | Clustering | p2p | [Encyclopaedic, Written] | None | None | -| [WikipediaRerankingMultilingual](https://huggingface.co/datasets/ellamind/wikipedia-2023-11-reranking-multilingual) | ['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] | Reranking | s2p | [Encyclopaedic, Written] | {'test': 24000} | {'test': {'num_samples': 24000, 'number_of_characters': 83866932, 'num_positive': 24000, 'num_negative': 192000, 'avg_query_len': 59.09, 'avg_positive_len': 385.45, 'avg_negative_len': 381.24, 'hf_subset_descriptive_stats': {'bg': {'num_samples': 1500, 'number_of_characters': 5145316, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 60.83, 'avg_positive_len': 375.89, 'avg_negative_len': 374.19}, 'bn': {'num_samples': 1500, 'number_of_characters': 5390581, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 47.27, 'avg_positive_len': 394.59, 'avg_negative_len': 393.98}, 'cs': {'num_samples': 1500, 'number_of_characters': 5079180, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 56.27, 'avg_positive_len': 383.84, 'avg_negative_len': 368.25}, 'da': {'num_samples': 1500, 'number_of_characters': 4746132, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 56.75, 'avg_positive_len': 351.68, 'avg_negative_len': 344.46}, 'de': {'num_samples': 1500, 'number_of_characters': 5483592, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 70.0, 'avg_positive_len': 391.54, 'avg_negative_len': 399.27}, 'en': {'num_samples': 1500, 'number_of_characters': 6217884, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 68.37, 'avg_positive_len': 451.73, 'avg_negative_len': 453.14}, 'fa': {'num_samples': 1500, 'number_of_characters': 4732619, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 48.67, 'avg_positive_len': 347.7, 'avg_negative_len': 344.84}, 'fi': {'num_samples': 1500, 'number_of_characters': 5209132, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 55.34, 'avg_positive_len': 394.71, 'avg_negative_len': 377.84}, 'hi': {'num_samples': 1500, 'number_of_characters': 5620959, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 50.78, 'avg_positive_len': 420.38, 'avg_negative_len': 409.52}, 'it': {'num_samples': 1500, 'number_of_characters': 5420496, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 70.05, 'avg_positive_len': 396.97, 'avg_negative_len': 393.33}, 'nl': {'num_samples': 1500, 'number_of_characters': 5169556, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 65.34, 'avg_positive_len': 380.79, 'avg_negative_len': 375.03}, 'pt': {'num_samples': 1500, 'number_of_characters': 5474356, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 65.12, 'avg_positive_len': 404.02, 'avg_negative_len': 397.55}, 'ro': {'num_samples': 1500, 'number_of_characters': 4796113, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 61.97, 'avg_positive_len': 346.71, 'avg_negative_len': 348.59}, 'sr': {'num_samples': 1500, 'number_of_characters': 5271732, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 55.67, 'avg_positive_len': 386.35, 'avg_negative_len': 384.06}, 'no': {'num_samples': 1500, 'number_of_characters': 5036586, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 55.29, 'avg_positive_len': 367.72, 'avg_negative_len': 366.84}, 'sv': {'num_samples': 1500, 'number_of_characters': 5072698, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 57.73, 'avg_positive_len': 372.59, 'avg_negative_len': 368.94}}}} | +| [WikipediaRerankingMultilingual](https://huggingface.co/datasets/ellamind/wikipedia-2023-11-reranking-multilingual) | ['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] | Reranking | s2p | [Encyclopaedic, Written] | {'test': 24000} | {'test': {'num_samples': 24000, 'number_of_characters': 83866932, 'num_positive': 24000, 'num_negative': 192000, 'min_query_length': 7, 'avg_query_length': 59.09, 'max_query_length': 180, 'unique_query': 23997, 'min_positive_length': 100, 'avg_positive_length': 385.45, 'max_positive_length': 3515, 'unique_positive': 23993, 'min_negative_length': 100, 'avg_negative_length': 381.24, 'max_negative_length': 9461, 'unique_negative': 191783, 'hf_subset_descriptive_stats': {'bg': {'num_samples': 1500, 'number_of_characters': 5145316, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 60.83, 'max_query_length': 166, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 375.89, 'max_positive_length': 2241, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 374.19, 'max_negative_length': 4869, 'unique_negative': 11996}, 'bn': {'num_samples': 1500, 'number_of_characters': 5390581, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 7, 'avg_query_length': 47.27, 'max_query_length': 123, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 394.59, 'max_positive_length': 2338, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 393.98, 'max_negative_length': 5104, 'unique_negative': 11996}, 'cs': {'num_samples': 1500, 'number_of_characters': 5079180, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 56.27, 'max_query_length': 137, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 383.84, 'max_positive_length': 2300, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 368.25, 'max_negative_length': 3487, 'unique_negative': 11982}, 'da': {'num_samples': 1500, 'number_of_characters': 4746132, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 56.75, 'max_query_length': 137, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 351.68, 'max_positive_length': 2159, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 344.46, 'max_negative_length': 2563, 'unique_negative': 11972}, 'de': {'num_samples': 1500, 'number_of_characters': 5483592, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 20, 'avg_query_length': 70.0, 'max_query_length': 180, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 391.54, 'max_positive_length': 2674, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 399.27, 'max_negative_length': 3083, 'unique_negative': 12000}, 'en': {'num_samples': 1500, 'number_of_characters': 6217884, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 68.37, 'max_query_length': 162, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 451.73, 'max_positive_length': 3515, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 453.14, 'max_negative_length': 3662, 'unique_negative': 12000}, 'fa': {'num_samples': 1500, 'number_of_characters': 4732619, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 12, 'avg_query_length': 48.67, 'max_query_length': 119, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 347.7, 'max_positive_length': 2571, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 344.84, 'max_negative_length': 4707, 'unique_negative': 11978}, 'fi': {'num_samples': 1500, 'number_of_characters': 5209132, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 55.34, 'max_query_length': 132, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 394.71, 'max_positive_length': 2129, 'unique_positive': 1498, 'min_negative_length': 100, 'avg_negative_length': 377.84, 'max_negative_length': 2574, 'unique_negative': 11972}, 'hi': {'num_samples': 1500, 'number_of_characters': 5620959, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 13, 'avg_query_length': 50.78, 'max_query_length': 125, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 420.38, 'max_positive_length': 2361, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 409.52, 'max_negative_length': 5912, 'unique_negative': 11996}, 'it': {'num_samples': 1500, 'number_of_characters': 5420496, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 23, 'avg_query_length': 70.05, 'max_query_length': 156, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 396.97, 'max_positive_length': 2082, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 393.33, 'max_negative_length': 9461, 'unique_negative': 11993}, 'nl': {'num_samples': 1500, 'number_of_characters': 5169556, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 65.34, 'max_query_length': 136, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 380.79, 'max_positive_length': 1864, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 375.03, 'max_negative_length': 3641, 'unique_negative': 11985}, 'pt': {'num_samples': 1500, 'number_of_characters': 5474356, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 65.12, 'max_query_length': 176, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 404.02, 'max_positive_length': 3057, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 397.55, 'max_negative_length': 2877, 'unique_negative': 11991}, 'ro': {'num_samples': 1500, 'number_of_characters': 4796113, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 61.97, 'max_query_length': 169, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 346.71, 'max_positive_length': 1917, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 348.59, 'max_negative_length': 4213, 'unique_negative': 11971}, 'sr': {'num_samples': 1500, 'number_of_characters': 5271732, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 15, 'avg_query_length': 55.67, 'max_query_length': 146, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 386.35, 'max_positive_length': 2421, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 384.06, 'max_negative_length': 3668, 'unique_negative': 11974}, 'no': {'num_samples': 1500, 'number_of_characters': 5036586, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 55.29, 'max_query_length': 129, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 367.72, 'max_positive_length': 1450, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 366.84, 'max_negative_length': 2841, 'unique_negative': 11996}, 'sv': {'num_samples': 1500, 'number_of_characters': 5072698, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 57.73, 'max_query_length': 133, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 372.59, 'max_positive_length': 2493, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 368.94, 'max_negative_length': 3680, 'unique_negative': 11999}}}} | | [WikipediaRetrievalMultilingual](https://huggingface.co/datasets/ellamind/wikipedia-2023-11-retrieval-multilingual-queries) | ['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] | Retrieval | s2p | [Encyclopaedic, Written] | None | None | | [WinoGrande](https://winogrande.allenai.org/) (Xiao et al., 2024) | ['eng'] | Retrieval | s2s | [Encyclopaedic, Written] | None | None | | [WisesightSentimentClassification](https://github.com/PyThaiNLP/wisesight-sentiment) | ['tha'] | Classification | s2s | [Social, News, Written] | None | None | | XMarket (Bonab et al., 2021) | ['deu', 'eng', 'spa'] | Retrieval | s2p | | None | None | -| [XNLI](https://aclanthology.org/D18-1269/) (Conneau et al., 2018) | ['ara', 'bul', 'deu', 'ell', 'eng', 'fra', 'hin', 'rus', 'spa', 'swa', 'tha', 'tur', 'vie', 'zho'] | PairClassification | s2s | [Non-fiction, Fiction, Government, Written] | {'test': 19110, 'validation': 19110} | {'test': {'num_samples': 19110, 'number_of_characters': 2907145, 'avg_sentence1_len': 103.24, 'avg_sentence2_len': 48.89, 'unique_labels': 2, 'labels': {'0': {'count': 9562}, '1': {'count': 9548}}, 'hf_subset_descriptive_stats': {'ar': {'num_samples': 1365, 'number_of_characters': 179591, 'avg_sentence1_len': 89.57, 'avg_sentence2_len': 41.99, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'bg': {'num_samples': 1365, 'number_of_characters': 220646, 'avg_sentence1_len': 110.02, 'avg_sentence2_len': 51.63, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'de': {'num_samples': 1365, 'number_of_characters': 241224, 'avg_sentence1_len': 119.93, 'avg_sentence2_len': 56.79, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'el': {'num_samples': 1365, 'number_of_characters': 240222, 'avg_sentence1_len': 119.05, 'avg_sentence2_len': 56.93, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'en': {'num_samples': 1365, 'number_of_characters': 212223, 'avg_sentence1_len': 105.67, 'avg_sentence2_len': 49.8, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'es': {'num_samples': 1365, 'number_of_characters': 232207, 'avg_sentence1_len': 115.43, 'avg_sentence2_len': 54.68, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'fr': {'num_samples': 1365, 'number_of_characters': 245259, 'avg_sentence1_len': 121.1, 'avg_sentence2_len': 58.58, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'hi': {'num_samples': 1365, 'number_of_characters': 211312, 'avg_sentence1_len': 104.63, 'avg_sentence2_len': 50.17, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'ru': {'num_samples': 1365, 'number_of_characters': 222797, 'avg_sentence1_len': 110.77, 'avg_sentence2_len': 52.45, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'sw': {'num_samples': 1365, 'number_of_characters': 210103, 'avg_sentence1_len': 104.44, 'avg_sentence2_len': 49.48, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'th': {'num_samples': 1365, 'number_of_characters': 192788, 'avg_sentence1_len': 96.69, 'avg_sentence2_len': 44.54, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'tr': {'num_samples': 1365, 'number_of_characters': 208658, 'avg_sentence1_len': 103.68, 'avg_sentence2_len': 49.19, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'vi': {'num_samples': 1365, 'number_of_characters': 223549, 'avg_sentence1_len': 111.31, 'avg_sentence2_len': 52.46, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'zh': {'num_samples': 1365, 'number_of_characters': 66566, 'avg_sentence1_len': 33.04, 'avg_sentence2_len': 15.73, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}}}, 'validation': {'num_samples': 19110, 'number_of_characters': 2909058, 'avg_sentence1_len': 103.21, 'avg_sentence2_len': 49.02, 'unique_labels': 2, 'labels': {'0': {'count': 9562}, '1': {'count': 9548}}, 'hf_subset_descriptive_stats': {'ar': {'num_samples': 1365, 'number_of_characters': 177355, 'avg_sentence1_len': 88.32, 'avg_sentence2_len': 41.61, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'bg': {'num_samples': 1365, 'number_of_characters': 219988, 'avg_sentence1_len': 109.2, 'avg_sentence2_len': 51.97, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'de': {'num_samples': 1365, 'number_of_characters': 241852, 'avg_sentence1_len': 119.81, 'avg_sentence2_len': 57.37, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'el': {'num_samples': 1365, 'number_of_characters': 241275, 'avg_sentence1_len': 119.88, 'avg_sentence2_len': 56.88, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'en': {'num_samples': 1365, 'number_of_characters': 212384, 'avg_sentence1_len': 105.72, 'avg_sentence2_len': 49.88, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'es': {'num_samples': 1365, 'number_of_characters': 232451, 'avg_sentence1_len': 115.17, 'avg_sentence2_len': 55.12, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'fr': {'num_samples': 1365, 'number_of_characters': 246857, 'avg_sentence1_len': 121.76, 'avg_sentence2_len': 59.09, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'hi': {'num_samples': 1365, 'number_of_characters': 212269, 'avg_sentence1_len': 105.06, 'avg_sentence2_len': 50.44, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'ru': {'num_samples': 1365, 'number_of_characters': 221152, 'avg_sentence1_len': 109.75, 'avg_sentence2_len': 52.27, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'sw': {'num_samples': 1365, 'number_of_characters': 210482, 'avg_sentence1_len': 104.32, 'avg_sentence2_len': 49.88, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'th': {'num_samples': 1365, 'number_of_characters': 192640, 'avg_sentence1_len': 97.28, 'avg_sentence2_len': 43.84, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'tr': {'num_samples': 1365, 'number_of_characters': 208305, 'avg_sentence1_len': 102.97, 'avg_sentence2_len': 49.64, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'vi': {'num_samples': 1365, 'number_of_characters': 224811, 'avg_sentence1_len': 112.26, 'avg_sentence2_len': 52.43, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'zh': {'num_samples': 1365, 'number_of_characters': 67237, 'avg_sentence1_len': 33.41, 'avg_sentence2_len': 15.85, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}}}} | +| [XNLI](https://aclanthology.org/D18-1269/) (Conneau et al., 2018) | ['ara', 'bul', 'deu', 'ell', 'eng', 'fra', 'hin', 'rus', 'spa', 'swa', 'tha', 'tur', 'vie', 'zho'] | PairClassification | s2s | [Non-fiction, Fiction, Government, Written] | {'test': 19110, 'validation': 19110} | {'test': {'num_samples': 19110, 'number_of_characters': 2907145, 'min_sentence1_length': 3, 'avg_sentence1_length': 103.24, 'max_sentence1_length': 401, 'unique_sentence1': 15328, 'min_sentence2_length': 2, 'avg_sentence2_length': 48.89, 'max_sentence2_length': 187, 'unique_sentence2': 19104, 'unique_labels': 2, 'labels': {'0': {'count': 9562}, '1': {'count': 9548}}, 'hf_subset_descriptive_stats': {'ar': {'num_samples': 1365, 'number_of_characters': 179591, 'min_sentence1_length': 11, 'avg_sentence1_length': 89.57, 'max_sentence1_length': 242, 'unique_sentence1': 1095, 'min_sentence2_length': 8, 'avg_sentence2_length': 41.99, 'max_sentence2_length': 115, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'bg': {'num_samples': 1365, 'number_of_characters': 220646, 'min_sentence1_length': 14, 'avg_sentence1_length': 110.02, 'max_sentence1_length': 303, 'unique_sentence1': 1095, 'min_sentence2_length': 8, 'avg_sentence2_length': 51.63, 'max_sentence2_length': 150, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'de': {'num_samples': 1365, 'number_of_characters': 241224, 'min_sentence1_length': 3, 'avg_sentence1_length': 119.93, 'max_sentence1_length': 301, 'unique_sentence1': 1095, 'min_sentence2_length': 9, 'avg_sentence2_length': 56.79, 'max_sentence2_length': 187, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'el': {'num_samples': 1365, 'number_of_characters': 240222, 'min_sentence1_length': 13, 'avg_sentence1_length': 119.05, 'max_sentence1_length': 344, 'unique_sentence1': 1095, 'min_sentence2_length': 13, 'avg_sentence2_length': 56.93, 'max_sentence2_length': 172, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'en': {'num_samples': 1365, 'number_of_characters': 212223, 'min_sentence1_length': 19, 'avg_sentence1_length': 105.67, 'max_sentence1_length': 268, 'unique_sentence1': 1095, 'min_sentence2_length': 9, 'avg_sentence2_length': 49.8, 'max_sentence2_length': 137, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'es': {'num_samples': 1365, 'number_of_characters': 232207, 'min_sentence1_length': 11, 'avg_sentence1_length': 115.43, 'max_sentence1_length': 385, 'unique_sentence1': 1094, 'min_sentence2_length': 8, 'avg_sentence2_length': 54.68, 'max_sentence2_length': 163, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'fr': {'num_samples': 1365, 'number_of_characters': 245259, 'min_sentence1_length': 9, 'avg_sentence1_length': 121.1, 'max_sentence1_length': 327, 'unique_sentence1': 1095, 'min_sentence2_length': 10, 'avg_sentence2_length': 58.58, 'max_sentence2_length': 169, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'hi': {'num_samples': 1365, 'number_of_characters': 211312, 'min_sentence1_length': 16, 'avg_sentence1_length': 104.63, 'max_sentence1_length': 401, 'unique_sentence1': 1095, 'min_sentence2_length': 9, 'avg_sentence2_length': 50.17, 'max_sentence2_length': 162, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'ru': {'num_samples': 1365, 'number_of_characters': 222797, 'min_sentence1_length': 11, 'avg_sentence1_length': 110.77, 'max_sentence1_length': 306, 'unique_sentence1': 1095, 'min_sentence2_length': 8, 'avg_sentence2_length': 52.45, 'max_sentence2_length': 167, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'sw': {'num_samples': 1365, 'number_of_characters': 210103, 'min_sentence1_length': 10, 'avg_sentence1_length': 104.44, 'max_sentence1_length': 266, 'unique_sentence1': 1094, 'min_sentence2_length': 2, 'avg_sentence2_length': 49.48, 'max_sentence2_length': 146, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'th': {'num_samples': 1365, 'number_of_characters': 192788, 'min_sentence1_length': 12, 'avg_sentence1_length': 96.69, 'max_sentence1_length': 262, 'unique_sentence1': 1095, 'min_sentence2_length': 6, 'avg_sentence2_length': 44.54, 'max_sentence2_length': 129, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'tr': {'num_samples': 1365, 'number_of_characters': 208658, 'min_sentence1_length': 15, 'avg_sentence1_length': 103.68, 'max_sentence1_length': 255, 'unique_sentence1': 1095, 'min_sentence2_length': 6, 'avg_sentence2_length': 49.19, 'max_sentence2_length': 140, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'vi': {'num_samples': 1365, 'number_of_characters': 223549, 'min_sentence1_length': 14, 'avg_sentence1_length': 111.31, 'max_sentence1_length': 265, 'unique_sentence1': 1095, 'min_sentence2_length': 9, 'avg_sentence2_length': 52.46, 'max_sentence2_length': 143, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'zh': {'num_samples': 1365, 'number_of_characters': 66566, 'min_sentence1_length': 4, 'avg_sentence1_length': 33.04, 'max_sentence1_length': 112, 'unique_sentence1': 1095, 'min_sentence2_length': 3, 'avg_sentence2_length': 15.73, 'max_sentence2_length': 59, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}}}, 'validation': {'num_samples': 19110, 'number_of_characters': 2909058, 'min_sentence1_length': 5, 'avg_sentence1_length': 103.21, 'max_sentence1_length': 323, 'unique_sentence1': 11171, 'min_sentence2_length': 3, 'avg_sentence2_length': 49.02, 'max_sentence2_length': 172, 'unique_sentence2': 19101, 'unique_labels': 2, 'labels': {'0': {'count': 9562}, '1': {'count': 9548}}, 'hf_subset_descriptive_stats': {'ar': {'num_samples': 1365, 'number_of_characters': 177355, 'min_sentence1_length': 13, 'avg_sentence1_length': 88.32, 'max_sentence1_length': 214, 'unique_sentence1': 798, 'min_sentence2_length': 6, 'avg_sentence2_length': 41.61, 'max_sentence2_length': 137, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'bg': {'num_samples': 1365, 'number_of_characters': 219988, 'min_sentence1_length': 16, 'avg_sentence1_length': 109.2, 'max_sentence1_length': 316, 'unique_sentence1': 798, 'min_sentence2_length': 10, 'avg_sentence2_length': 51.97, 'max_sentence2_length': 151, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'de': {'num_samples': 1365, 'number_of_characters': 241852, 'min_sentence1_length': 20, 'avg_sentence1_length': 119.81, 'max_sentence1_length': 298, 'unique_sentence1': 798, 'min_sentence2_length': 12, 'avg_sentence2_length': 57.37, 'max_sentence2_length': 162, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'el': {'num_samples': 1365, 'number_of_characters': 241275, 'min_sentence1_length': 16, 'avg_sentence1_length': 119.88, 'max_sentence1_length': 302, 'unique_sentence1': 798, 'min_sentence2_length': 6, 'avg_sentence2_length': 56.88, 'max_sentence2_length': 171, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'en': {'num_samples': 1365, 'number_of_characters': 212384, 'min_sentence1_length': 20, 'avg_sentence1_length': 105.72, 'max_sentence1_length': 271, 'unique_sentence1': 798, 'min_sentence2_length': 8, 'avg_sentence2_length': 49.88, 'max_sentence2_length': 139, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'es': {'num_samples': 1365, 'number_of_characters': 232451, 'min_sentence1_length': 14, 'avg_sentence1_length': 115.17, 'max_sentence1_length': 265, 'unique_sentence1': 798, 'min_sentence2_length': 7, 'avg_sentence2_length': 55.12, 'max_sentence2_length': 148, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'fr': {'num_samples': 1365, 'number_of_characters': 246857, 'min_sentence1_length': 19, 'avg_sentence1_length': 121.76, 'max_sentence1_length': 323, 'unique_sentence1': 798, 'min_sentence2_length': 11, 'avg_sentence2_length': 59.09, 'max_sentence2_length': 172, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'hi': {'num_samples': 1365, 'number_of_characters': 212269, 'min_sentence1_length': 18, 'avg_sentence1_length': 105.06, 'max_sentence1_length': 277, 'unique_sentence1': 798, 'min_sentence2_length': 7, 'avg_sentence2_length': 50.44, 'max_sentence2_length': 152, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'ru': {'num_samples': 1365, 'number_of_characters': 221152, 'min_sentence1_length': 15, 'avg_sentence1_length': 109.75, 'max_sentence1_length': 310, 'unique_sentence1': 798, 'min_sentence2_length': 8, 'avg_sentence2_length': 52.27, 'max_sentence2_length': 140, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'sw': {'num_samples': 1365, 'number_of_characters': 210482, 'min_sentence1_length': 13, 'avg_sentence1_length': 104.32, 'max_sentence1_length': 264, 'unique_sentence1': 798, 'min_sentence2_length': 8, 'avg_sentence2_length': 49.88, 'max_sentence2_length': 153, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'th': {'num_samples': 1365, 'number_of_characters': 192640, 'min_sentence1_length': 7, 'avg_sentence1_length': 97.28, 'max_sentence1_length': 255, 'unique_sentence1': 798, 'min_sentence2_length': 3, 'avg_sentence2_length': 43.84, 'max_sentence2_length': 140, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'tr': {'num_samples': 1365, 'number_of_characters': 208305, 'min_sentence1_length': 15, 'avg_sentence1_length': 102.97, 'max_sentence1_length': 269, 'unique_sentence1': 798, 'min_sentence2_length': 10, 'avg_sentence2_length': 49.64, 'max_sentence2_length': 139, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'vi': {'num_samples': 1365, 'number_of_characters': 224811, 'min_sentence1_length': 18, 'avg_sentence1_length': 112.26, 'max_sentence1_length': 323, 'unique_sentence1': 798, 'min_sentence2_length': 9, 'avg_sentence2_length': 52.43, 'max_sentence2_length': 159, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'zh': {'num_samples': 1365, 'number_of_characters': 67237, 'min_sentence1_length': 5, 'avg_sentence1_length': 33.41, 'max_sentence1_length': 135, 'unique_sentence1': 798, 'min_sentence2_length': 3, 'avg_sentence2_length': 15.85, 'max_sentence2_length': 66, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}}}} | | [XNLIV2](https://arxiv.org/pdf/2301.06527) (Upadhyay et al., 2023) | ['asm', 'ben', 'bho', 'ell', 'guj', 'kan', 'mar', 'ory', 'pan', 'rus', 'san', 'tam', 'tur'] | PairClassification | s2s | [Non-fiction, Fiction, Government, Written] | None | None | | [XPQARetrieval](https://arxiv.org/abs/2305.09249) (Shen et al., 2023) | ['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'jpn', 'kor', 'pol', 'por', 'spa', 'tam'] | Retrieval | s2p | [Reviews, Written] | None | None | | [XQuADRetrieval](https://huggingface.co/datasets/xquad) (Mikel Artetxe, 2019) | ['arb', 'deu', 'ell', 'eng', 'hin', 'ron', 'rus', 'spa', 'tha', 'tur', 'vie', 'zho'] | Retrieval | s2p | [Web, Written] | None | None | @@ -594,8 +594,8 @@ The following tables give you an overview of the tasks in MTEB. | [YelpReviewFullClassification](https://arxiv.org/abs/1509.01626) (Zhang et al., 2015) | ['eng'] | Classification | s2s | [Reviews, Written] | None | None | | [YueOpenriceReviewClassification](https://github.com/Christainx/Dataset_Cantonese_Openrice) (Xiang et al., 2019) | ['yue'] | Classification | s2s | [Reviews, Spoken] | None | None | | [indonli](https://link.springer.com/chapter/10.1007/978-3-030-41505-1_39) | ['ind'] | PairClassification | s2s | [Encyclopaedic, Web, News, Written] | None | None | -| [mFollowIRCrossLingualInstructionRetrieval](https://neuclir.github.io/) (Weller et al., 2024) | ['eng', 'fas', 'rus', 'zho'] | Retrieval | s2p | [News, Written] | {'test': 121758} | {'test': {'num_samples': 121758, 'num_docs': 121635, 'num_queries': 123, 'number_of_characters': 283654099, 'average_document_length': 2331.08, 'average_query_length': 81.88, 'average_instruction_length': 389.95, 'average_changed_instruction_length': 450.55, 'average_relevant_docs_per_query': 10.43, 'average_top_ranked_per_query': 1000.0, 'hf_subset_descriptive_stats': {'eng-fas': {'num_samples': 41229, 'num_docs': 41189, 'num_queries': 40, 'number_of_characters': 129597567, 'average_document_length': 3145.5, 'average_query_length': 80.08, 'average_instruction_length': 396.88, 'average_changed_instruction_length': 463.18, 'average_relevant_docs_per_query': 10.85, 'average_top_ranked_per_query': 1000.0}, 'eng-rus': {'num_samples': 39366, 'num_docs': 39326, 'num_queries': 40, 'number_of_characters': 109522175, 'average_document_length': 2784.08, 'average_query_length': 81.88, 'average_instruction_length': 371.12, 'average_changed_instruction_length': 431.8, 'average_relevant_docs_per_query': 9.78, 'average_top_ranked_per_query': 1000.0}, 'eng-zho': {'num_samples': 41163, 'num_docs': 41120, 'num_queries': 43, 'number_of_characters': 44534357, 'average_document_length': 1082.05, 'average_query_length': 83.56, 'average_instruction_length': 401.02, 'average_changed_instruction_length': 456.26, 'average_relevant_docs_per_query': 10.65, 'average_top_ranked_per_query': 1000.0}}}} | -| [mFollowIRInstructionRetrieval](https://neuclir.github.io/) (Weller et al., 2024) | ['fas', 'rus', 'zho'] | Retrieval | s2p | [News, Written] | {'test': 121758} | {'test': {'num_samples': 121758, 'num_docs': 121635, 'num_queries': 123, 'number_of_characters': 283622456, 'average_document_length': 2331.08, 'average_query_length': 57.11, 'average_instruction_length': 281.07, 'average_changed_instruction_length': 326.94, 'average_relevant_docs_per_query': 10.43, 'average_top_ranked_per_query': 1000.0, 'hf_subset_descriptive_stats': {'fas': {'num_samples': 41229, 'num_docs': 41189, 'num_queries': 40, 'number_of_characters': 129593838, 'average_document_length': 3145.5, 'average_query_length': 72.65, 'average_instruction_length': 358.93, 'average_changed_instruction_length': 415.32, 'average_relevant_docs_per_query': 10.85, 'average_top_ranked_per_query': 1000.0}, 'rus': {'num_samples': 39366, 'num_docs': 39326, 'num_queries': 40, 'number_of_characters': 109523683, 'average_document_length': 2784.08, 'average_query_length': 77.5, 'average_instruction_length': 387.0, 'average_changed_instruction_length': 458.0, 'average_relevant_docs_per_query': 9.78, 'average_top_ranked_per_query': 1000.0}, 'zho': {'num_samples': 41163, 'num_docs': 41120, 'num_queries': 43, 'number_of_characters': 44504935, 'average_document_length': 1082.05, 'average_query_length': 23.7, 'average_instruction_length': 110.09, 'average_changed_instruction_length': 122.81, 'average_relevant_docs_per_query': 10.65, 'average_top_ranked_per_query': 1000.0}}}} | +| [mFollowIRCrossLingualInstructionRetrieval](https://neuclir.github.io/) (Weller et al., 2024) | ['eng', 'fas', 'rus', 'zho'] | Retrieval | s2p | [News, Written] | {'test': 121758} | {'test': {'num_samples': 121758, 'num_docs': 121635, 'num_queries': 123, 'number_of_characters': 283654099, 'min_document_length': 74, 'average_document_length': 2331.08, 'max_document_length': 24179, 'unique_docs': 121635, 'min_query_length': 32, 'average_query_length': 81.88, 'max_query_length': 173, 'unique_queries': 75, 'min_instruction_length': 93, 'average_instruction_length': 389.95, 'max_instruction_length': 887, 'unique_instructions': 75, 'min_changed_instruction_length': 180, 'average_changed_instruction_length': 450.55, 'max_changed_instruction_length': 974, 'unique_changed_instructions': 123, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 10.43, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000, 'hf_subset_descriptive_stats': {'eng-fas': {'num_samples': 41229, 'num_docs': 41189, 'num_queries': 40, 'number_of_characters': 129597567, 'min_document_length': 99, 'average_document_length': 3145.5, 'max_document_length': 24179, 'unique_docs': 41189, 'min_query_length': 34, 'average_query_length': 80.08, 'max_query_length': 124, 'unique_queries': 40, 'min_instruction_length': 150, 'average_instruction_length': 396.88, 'max_instruction_length': 887, 'unique_instructions': 40, 'min_changed_instruction_length': 205, 'average_changed_instruction_length': 463.18, 'max_changed_instruction_length': 974, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.85, 'max_average_relevant_docs_per_query': 22, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'eng-rus': {'num_samples': 39366, 'num_docs': 39326, 'num_queries': 40, 'number_of_characters': 109522175, 'min_document_length': 75, 'average_document_length': 2784.08, 'max_document_length': 24061, 'unique_docs': 39326, 'min_query_length': 32, 'average_query_length': 81.88, 'max_query_length': 173, 'unique_queries': 40, 'min_instruction_length': 93, 'average_instruction_length': 371.12, 'max_instruction_length': 887, 'unique_instructions': 40, 'min_changed_instruction_length': 180, 'average_changed_instruction_length': 431.8, 'max_changed_instruction_length': 957, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 9.78, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'eng-zho': {'num_samples': 41163, 'num_docs': 41120, 'num_queries': 43, 'number_of_characters': 44534357, 'min_document_length': 74, 'average_document_length': 1082.05, 'max_document_length': 23840, 'unique_docs': 41120, 'min_query_length': 32, 'average_query_length': 83.56, 'max_query_length': 159, 'unique_queries': 43, 'min_instruction_length': 157, 'average_instruction_length': 401.02, 'max_instruction_length': 731, 'unique_instructions': 43, 'min_changed_instruction_length': 209, 'average_changed_instruction_length': 456.26, 'max_changed_instruction_length': 822, 'unique_changed_instructions': 43, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.65, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}}}} | +| [mFollowIRInstructionRetrieval](https://neuclir.github.io/) (Weller et al., 2024) | ['fas', 'rus', 'zho'] | Retrieval | s2p | [News, Written] | {'test': 121758} | {'test': {'num_samples': 121758, 'num_docs': 121635, 'num_queries': 123, 'number_of_characters': 283622456, 'min_document_length': 74, 'average_document_length': 2331.08, 'max_document_length': 24179, 'unique_docs': 121635, 'min_query_length': 10, 'average_query_length': 57.11, 'max_query_length': 136, 'unique_queries': 123, 'min_instruction_length': 37, 'average_instruction_length': 281.07, 'max_instruction_length': 1009, 'unique_instructions': 123, 'min_changed_instruction_length': 44, 'average_changed_instruction_length': 326.94, 'max_changed_instruction_length': 1083, 'unique_changed_instructions': 123, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 10.43, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000, 'hf_subset_descriptive_stats': {'fas': {'num_samples': 41229, 'num_docs': 41189, 'num_queries': 40, 'number_of_characters': 129593838, 'min_document_length': 99, 'average_document_length': 3145.5, 'max_document_length': 24179, 'unique_docs': 41189, 'min_query_length': 34, 'average_query_length': 72.65, 'max_query_length': 124, 'unique_queries': 40, 'min_instruction_length': 121, 'average_instruction_length': 358.93, 'max_instruction_length': 759, 'unique_instructions': 40, 'min_changed_instruction_length': 163, 'average_changed_instruction_length': 415.32, 'max_changed_instruction_length': 842, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.85, 'max_average_relevant_docs_per_query': 22, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'rus': {'num_samples': 39366, 'num_docs': 39326, 'num_queries': 40, 'number_of_characters': 109523683, 'min_document_length': 75, 'average_document_length': 2784.08, 'max_document_length': 24061, 'unique_docs': 39326, 'min_query_length': 26, 'average_query_length': 77.5, 'max_query_length': 136, 'unique_queries': 40, 'min_instruction_length': 78, 'average_instruction_length': 387.0, 'max_instruction_length': 1009, 'unique_instructions': 40, 'min_changed_instruction_length': 187, 'average_changed_instruction_length': 458.0, 'max_changed_instruction_length': 1083, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 9.78, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'zho': {'num_samples': 41163, 'num_docs': 41120, 'num_queries': 43, 'number_of_characters': 44504935, 'min_document_length': 74, 'average_document_length': 1082.05, 'max_document_length': 23840, 'unique_docs': 41120, 'min_query_length': 10, 'average_query_length': 23.7, 'max_query_length': 44, 'unique_queries': 43, 'min_instruction_length': 37, 'average_instruction_length': 110.09, 'max_instruction_length': 209, 'unique_instructions': 43, 'min_changed_instruction_length': 44, 'average_changed_instruction_length': 122.81, 'max_changed_instruction_length': 229, 'unique_changed_instructions': 43, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.65, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}}}} | From 78c0e4eb2a3d7e3bff9b27bca2488817a7508279 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 14 Nov 2024 11:05:39 +0000 Subject: [PATCH 04/76] 1.19.5 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 460fc8aed4..223e6f9673 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.19.4" +version = "1.19.5" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 4e86ceab8f11d5cacf38e5f959f846c962105e34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20Kardos?= Date: Thu, 14 Nov 2024 13:32:21 +0100 Subject: [PATCH 05/76] Fix: Made data parsing in the leaderboard figure more robust (#1450) Bugfixes with data parsing in main figure --- mteb/leaderboard/figures.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mteb/leaderboard/figures.py b/mteb/leaderboard/figures.py index 7a354f7c82..373bcd00c6 100644 --- a/mteb/leaderboard/figures.py +++ b/mteb/leaderboard/figures.py @@ -14,6 +14,10 @@ def parse_n_params(text: str) -> int: def parse_model_name(name: str) -> str: + if name is None: + return "" + if "]" not in name: + return name name, _ = name.split("]") return name[1:] @@ -38,8 +42,8 @@ def performance_size_plot(df: pd.DataFrame) -> go.Figure: df["Number of Parameters"] = df["Number of Parameters"].map(parse_n_params) df["Model"] = df["Model"].map(parse_model_name) df["model_text"] = df["Model"].where(df["Model"].isin(models_to_annotate), "") - df["Embedding Dimensions"] = df["Embedding Dimensions"].map(int) - df["Max Tokens"] = df["Max Tokens"].map(int) + df["Embedding Dimensions"] = df["Embedding Dimensions"].map(parse_float) + df["Max Tokens"] = df["Max Tokens"].map(parse_float) df["Log(Tokens)"] = np.log10(df["Max Tokens"]) df["Mean (Task)"] = df["Mean (Task)"].map(parse_float) df = df.dropna(subset=["Mean (Task)", "Number of Parameters"]) From 039d01088f457297a3a1929ff713cc3d55050453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20Kardos?= Date: Thu, 14 Nov 2024 14:19:24 +0100 Subject: [PATCH 06/76] Fixed task loading (#1451) * Fixed task result loading from disk * Fixed task result loading from disk --- mteb/load_results/task_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/load_results/task_results.py b/mteb/load_results/task_results.py index 202ed9b5f5..d619e05a61 100644 --- a/mteb/load_results/task_results.py +++ b/mteb/load_results/task_results.py @@ -290,7 +290,7 @@ def from_disk(cls, path: Path, load_historic_data: bool = True) -> TaskResult: f"Error loading TaskResult from disk. You can try to load historic data by setting `load_historic_data=True`. Error: {e}" ) - if data["mteb_version"] is None: + if ("mteb_version" in data) and (data["mteb_version"] is None): data.pop("mteb_version") pre_1_11_load = ( From feb1ab7652102696a4aa20a03dc98a7240274a20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20Kardos?= Date: Thu, 14 Nov 2024 15:11:28 +0100 Subject: [PATCH 07/76] fix: publish (#1452) From 3397633ba976ad3c471c3de20a6fb3bc0c28de7c Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 14 Nov 2024 14:16:59 +0000 Subject: [PATCH 08/76] 1.19.6 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 223e6f9673..fecb2d8516 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.19.5" +version = "1.19.6" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 14d7523850edae97cda2a7264f357da29e0ac867 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Fri, 15 Nov 2024 01:33:30 +0500 Subject: [PATCH 09/76] fix: Fix load external results with `None` mteb_version (#1453) * fix * lint --- mteb/load_results/task_results.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/mteb/load_results/task_results.py b/mteb/load_results/task_results.py index d619e05a61..71943f8771 100644 --- a/mteb/load_results/task_results.py +++ b/mteb/load_results/task_results.py @@ -290,15 +290,10 @@ def from_disk(cls, path: Path, load_historic_data: bool = True) -> TaskResult: f"Error loading TaskResult from disk. You can try to load historic data by setting `load_historic_data=True`. Error: {e}" ) - if ("mteb_version" in data) and (data["mteb_version"] is None): - data.pop("mteb_version") - pre_1_11_load = ( - ( - "mteb_version" in data - and Version(data["mteb_version"]) < Version("1.11.0") - ) - or "mteb_version" not in data + "mteb_version" in data + and data["mteb_version"] is not None + and Version(data["mteb_version"]) < Version("1.11.0") ) # assume it is before 1.11.0 if the version is not present try: obj = cls.model_validate(data) @@ -310,9 +305,11 @@ def from_disk(cls, path: Path, load_historic_data: bool = True) -> TaskResult: ) obj = cls._convert_from_before_v1_11_0(data) - pre_v_12_48 = "mteb_version" in data and Version( - data["mteb_version"] - ) < Version("1.12.48") + pre_v_12_48 = ( + "mteb_version" in data + and data["mteb_version"] is not None + and Version(data["mteb_version"]) < Version("1.12.48") + ) if pre_v_12_48: cls._fix_pair_classification_scores(obj) From 68eb498ad02d4e08aeb275835d07e368f79cb500 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 14 Nov 2024 20:36:19 +0000 Subject: [PATCH 10/76] 1.19.7 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fecb2d8516..297d9dbe3a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.19.6" +version = "1.19.7" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 58c459bcd3e1ee772624f723e86efb86e40db6cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20Kardos?= Date: Fri, 15 Nov 2024 12:55:48 +0100 Subject: [PATCH 11/76] WIP: Polishing up leaderboard UI (#1461) * fix: Removed column wrapping on the table, so that it remains readable * Added disclaimer to figure * fix: Added links to task info table, switched out license with metric --- mteb/leaderboard/app.py | 15 +++++++++++---- mteb/leaderboard/table.py | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/mteb/leaderboard/app.py b/mteb/leaderboard/app.py index 9b89d5dd4c..8a5eb961c1 100644 --- a/mteb/leaderboard/app.py +++ b/mteb/leaderboard/app.py @@ -60,21 +60,25 @@ def format_list(props: list[str]): return ", ".join(props) -def update_task_info(task_names: str) -> str: +def update_task_info(task_names: str) -> gr.DataFrame: tasks = mteb.get_tasks(tasks=task_names) - df = tasks.to_dataframe() + df = tasks.to_dataframe( + properties=["name", "type", "languages", "domains", "reference", "main_score"] + ) df["languages"] = df["languages"].map(format_list) df["domains"] = df["domains"].map(format_list) + df["name"] = "[" + df["name"] + "](" + df["reference"] + ")" df = df.rename( columns={ "name": "Task Name", "type": "Task Type", "languages": "Languages", "domains": "Domains", - "license": "License", + "main_score": "Metric", } ) - return df + df = df.drop(columns="reference") + return gr.DataFrame(df, datatype=["markdown"] + ["str"] * (len(df.columns) - 1)) all_results = load_results().filter_models() @@ -215,6 +219,9 @@ def update_task_info(task_names: str) -> str: citation = gr.Markdown(update_citation, inputs=[benchmark_select]) with gr.Column(): plot = gr.Plot(performance_size_plot, inputs=[summary_table]) + gr.Markdown( + "*We only display models that have been run on all tasks in the benchmark*" + ) with gr.Tab("Summary"): summary_table.render() with gr.Tab("Performance per task"): diff --git a/mteb/leaderboard/table.py b/mteb/leaderboard/table.py index d9b830d236..c965a7f682 100644 --- a/mteb/leaderboard/table.py +++ b/mteb/leaderboard/table.py @@ -200,7 +200,7 @@ def scores_to_tables( joint_table_style, # column_widths=column_widths, datatype=column_types, - wrap=True, + # wrap=True, ), gr.DataFrame(per_task_style), ) From 1b920ac06bb83eba9530c3ddd125e09fb146dc95 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Fri, 15 Nov 2024 17:10:37 +0500 Subject: [PATCH 12/76] fix: loading pre 1.11.0 (#1460) * small fix * fix: fix --- mteb/load_results/task_results.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mteb/load_results/task_results.py b/mteb/load_results/task_results.py index 71943f8771..b3b1f8cba2 100644 --- a/mteb/load_results/task_results.py +++ b/mteb/load_results/task_results.py @@ -291,10 +291,14 @@ def from_disk(cls, path: Path, load_historic_data: bool = True) -> TaskResult: ) pre_1_11_load = ( - "mteb_version" in data - and data["mteb_version"] is not None - and Version(data["mteb_version"]) < Version("1.11.0") + ( + "mteb_version" in data + and data["mteb_version"] is not None + and Version(data["mteb_version"]) < Version("1.11.0") + ) + or "mteb_version" not in data ) # assume it is before 1.11.0 if the version is not present + try: obj = cls.model_validate(data) except Exception as e: From a988fef10cb73e2a35238f14f5c59a6615bbdaeb Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 15 Nov 2024 12:13:44 +0000 Subject: [PATCH 13/76] 1.19.8 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 297d9dbe3a..f680552874 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.19.7" +version = "1.19.8" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 9b2aecebe00e17b9db02d4fd3182df92222d680d Mon Sep 17 00:00:00 2001 From: Isaac Chung Date: Mon, 18 Nov 2024 00:27:54 +0200 Subject: [PATCH 14/76] fix: swap touche2020 to maintain compatibility (#1469) swap touche2020 for parity --- mteb/benchmarks/benchmarks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 743a5bde12..4b5c53c2c7 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -106,7 +106,7 @@ def load_results( "StackExchangeClustering.v2", "StackExchangeClusteringP2P.v2", "TRECCOVID", - "Touche2020", + "Touche2020Retrieval.v3", "ToxicConversationsClassification", "TweetSentimentExtractionClassification", "TwentyNewsgroupsClustering.v2", @@ -186,7 +186,7 @@ def load_results( "StackOverflowDupQuestions", "SummEval", "TRECCOVID", - "Touche2020Retrieval.v3", + "Touche2020", "ToxicConversationsClassification", "TweetSentimentExtractionClassification", "TwentyNewsgroupsClustering", From 8bb4a2992337b253b6e95305525709514fea7438 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sun, 17 Nov 2024 22:46:17 +0000 Subject: [PATCH 15/76] 1.19.9 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f680552874..b39544d323 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.19.8" +version = "1.19.9" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 2fb6fe764585f0cf6555d15ba9b2e18d4adddcf3 Mon Sep 17 00:00:00 2001 From: Isaac Chung Date: Mon, 18 Nov 2024 11:58:00 +0200 Subject: [PATCH 16/76] docs: Add sum per language for task counts (#1468) * add sum per lang * add sort by sum option * make lint --- docs/create_tasks_table.py | 19 +- docs/tasks.md | 2108 ++++++++++++++++++------------------ 2 files changed, 1066 insertions(+), 1061 deletions(-) diff --git a/docs/create_tasks_table.py b/docs/create_tasks_table.py index a6111523a9..13e9830276 100644 --- a/docs/create_tasks_table.py +++ b/docs/create_tasks_table.py @@ -68,7 +68,7 @@ def create_tasks_table(tasks: list[mteb.AbsTask]) -> str: return table -def create_task_lang_table(tasks: list[mteb.AbsTask]) -> str: +def create_task_lang_table(tasks: list[mteb.AbsTask], sort_by_sum=False) -> str: table_dict = {} ## Group by language. If it is a multilingual dataset, 1 is added to all languages present. for task in tasks: @@ -82,22 +82,27 @@ def create_task_lang_table(tasks: list[mteb.AbsTask]) -> str: ## Wrangle for polars pl_table_dict = [] for lang, d in table_dict.items(): - d.update({"lang": lang}) + d.update({"0-lang": lang}) # for sorting columns pl_table_dict.append(d) - df = pl.DataFrame(pl_table_dict).sort(by="lang") + df = pl.DataFrame(pl_table_dict).sort(by="0-lang") + df = df.with_columns(sum=pl.sum_horizontal(get_args(TASK_TYPE))) + df = df.select(sorted(df.columns)) + if sort_by_sum: + df = df.sort(by="sum", descending=True) + total = df.sum() task_names_md = " | ".join(sorted(get_args(TASK_TYPE))) - horizontal_line_md = "---|---" * len(sorted(get_args(TASK_TYPE))) + horizontal_line_md = "---|---" * (len(sorted(get_args(TASK_TYPE))) + 1) table = f""" -| Language | {task_names_md} | +| Language | {task_names_md} | Sum | |{horizontal_line_md}| """ for row in df.iter_rows(): - table += f"| {row[-1]} " - for num in row[:-1]: + table += f"| {row[0]} " + for num in row[1:]: table += f"| {num} " table += "|\n" diff --git a/docs/tasks.md b/docs/tasks.md index fd61d8af39..54424029d2 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -607,1060 +607,1060 @@ The following tables give you an overview of the tasks in MTEB.
-| Language | BitextMining | Classification | Clustering | InstructionRetrieval | MultilabelClassification | PairClassification | Reranking | Retrieval | STS | Speed | Summarization | -|---|------|------|------|------|------|------|------|------|------|------|---| -| aai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aau | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aaz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| abs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| abt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| abx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aby | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ace | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| acf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| acm | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| acq | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| acr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| acu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| adz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aeb | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aer | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aey | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| afr | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | -| agd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| agg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| agm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| agn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| agr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| agt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| agu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aia | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aii | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ajp | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aka | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ake | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| alp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| alq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| als | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| aly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ame | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amh | 3 | 6 | 3 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | -| amk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| amx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ang | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| anh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| anp | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| anv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aoi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aoj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aom | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| apb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| apc | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| ape | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| apn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| apr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| apu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| apw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| apz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ara | 2 | 12 | 0 | 0 | 0 | 2 | 1 | 9 | 2 | 0 | 0 | -| arb | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | -| are | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| arl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| arn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| arp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| arq | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | -| ars | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| ary | 1 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | -| arz | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| asm | 5 | 3 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | -| aso | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ast | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ata | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| atb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| atd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| atg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| att | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| auc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| auy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| avt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| awa | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| awb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| awk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| awx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ayr | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| azb | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| aze | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| azg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| azj | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| azz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bak | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bam | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| ban | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bba | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bbb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bbc | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bbr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bco | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bdd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bef | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bel | 4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bem | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ben | 7 | 9 | 2 | 0 | 0 | 1 | 2 | 6 | 1 | 0 | 0 | -| beo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ber | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| beu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bew | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bgc | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bgs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bgt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bhb | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bhd | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bhg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bhl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bho | 2 | 2 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | -| bhp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| big | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bjj | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bjk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bjn | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bjp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bjr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bjv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bjz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bkd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bki | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bkq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bkx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| blw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| blz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bmh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bmk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bmr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bmu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bnp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bns | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| boa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bod | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| boj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bos | 3 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| box | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| boy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bpr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bps | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bqc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bqp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bra | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bre | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| brx | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bsj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bsn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bsp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bug | 2 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| buk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bul | 3 | 4 | 1 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 0 | -| bus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bvd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bvr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bxh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| byr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| byx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bzd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bzh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| bzj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| caa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| caf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| car | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cat | 3 | 2 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| cav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cax | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbk | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cbv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cco | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ceb | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| cek | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ces | 4 | 5 | 2 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 0 | -| cgc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cha | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| chd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| chf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| chk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| chq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| chv | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| chz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cjk | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cjo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cjv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ckb | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| cle | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| clu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cme | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cmn | 4 | 10 | 4 | 0 | 0 | 3 | 4 | 10 | 9 | 0 | 0 | -| cmo | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cni | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cnl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cnt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| code | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | -| cof | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| con | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cor | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cot | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cpa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cpb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cpc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cpu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cpy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| crh | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| crn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| crx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| csb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cso | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| csy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cta | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cth | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ctp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ctu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cuc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cuk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cut | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cux | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cwe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cya | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| cym | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| daa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dan | 5 | 9 | 2 | 0 | 1 | 0 | 1 | 5 | 0 | 0 | 0 | -| ded | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| deu | 6 | 14 | 7 | 0 | 1 | 6 | 2 | 18 | 4 | 0 | 0 | -| dgc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dgr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dgz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dhg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dik | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| div | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dji | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| djk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| djr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dob | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| doi | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dov | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dsb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dtp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dwr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dww | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dwy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dyu | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dza | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| dzo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ebk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| eko | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ell | 3 | 6 | 1 | 0 | 1 | 2 | 0 | 3 | 0 | 0 | 0 | -| emi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| emp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| eng | 16 | 143 | 16 | 3 | 1 | 8 | 8 | 91 | 13 | 2 | 1 | -| enq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| epo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| eri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ese | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| esk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| est | 2 | 2 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | -| etr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| eus | 3 | 2 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| ewe | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| faa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fao | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | -| far | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fas | 1 | 4 | 0 | 0 | 0 | 1 | 2 | 9 | 0 | 0 | 0 | -| ffm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fij | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fil | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fin | 3 | 5 | 1 | 0 | 1 | 1 | 2 | 5 | 1 | 0 | 0 | -| fon | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| for | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fra | 7 | 13 | 8 | 0 | 1 | 5 | 3 | 14 | 4 | 0 | 1 | -| fry | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fuc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fuf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fuh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fur | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| fuv | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| gah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gaw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gaz | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| gbm | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gdn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gdr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| geb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gfk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ghs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gla | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gle | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| glg | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| glk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| glv | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gmv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gng | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gnn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gnw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gof | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gom | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| grc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| grn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| gsw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| guh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| guj | 6 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | -| gul | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gum | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gun | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| guo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gup | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gux | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gvc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gvf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gvs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gwi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gym | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| gyr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hat | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| hau | 4 | 5 | 3 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | -| haw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hbo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| heb | 4 | 5 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| heg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hin | 9 | 12 | 2 | 0 | 0 | 1 | 2 | 10 | 2 | 0 | 0 | -| hix | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hla | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hlt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hmn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hmo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hne | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hns | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hot | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hrv | 4 | 3 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | -| hsb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hto | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hun | 5 | 3 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | -| hus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| huu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| huv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| hye | 3 | 3 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | -| ian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ibo | 3 | 5 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| ido | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ign | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ikk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ikw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ile | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ilo | 2 | 1 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| imo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ina | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| inb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ind | 6 | 7 | 1 | 0 | 0 | 1 | 1 | 4 | 1 | 0 | 0 | -| ino | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| iou | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ipi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| isl | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| isn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ita | 5 | 9 | 1 | 0 | 1 | 2 | 1 | 5 | 3 | 0 | 0 | -| iws | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ixl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jae | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jav | 4 | 7 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| jic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jiv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jni | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| jpn | 5 | 8 | 3 | 0 | 0 | 1 | 3 | 13 | 2 | 0 | 0 | -| jvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kab | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kac | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| kam | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kan | 6 | 7 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | -| kaq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kas | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kat | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | -| kaz | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| kbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kbh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kbm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kbp | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kbq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kdc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kde | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kdl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kea | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| kek | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ken | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kew | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kfg | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kfy | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kgf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kgk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kgp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| khk | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| khm | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| khs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| khz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kik | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kin | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | -| kir | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| kiw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kiz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kje | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kjs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kkc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kkl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| klt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| klv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kmb | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kmg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kmh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kmk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kmo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kmr | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kms | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kmu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| knc | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kne | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| knf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| knj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| knv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kon | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kor | 4 | 8 | 1 | 0 | 1 | 2 | 1 | 9 | 3 | 0 | 0 | -| kos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kpf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kpg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kpj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kpr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kpw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kpx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kqa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kqc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kqf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kql | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kqw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| krc | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ksd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ksj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ksr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ktm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kto | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kud | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kup | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kur | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kvg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kwd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kwf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kwi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kwj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kyc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kyf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kyg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kyq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kyz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kze | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| kzj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lao | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| lat | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lav | 1 | 2 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | -| lbb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lbk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lcm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| leu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lex | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lfn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lgl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lij | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lim | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lin | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| lit | 4 | 1 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | -| llg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lmo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ltg | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ltz | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lua | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lug | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| luo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| lus | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| lvs | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| lww | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| maa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mad | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mag | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mai | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| maj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mak | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mal | 7 | 7 | 2 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | -| mam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| maq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mar | 7 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 2 | 0 | 0 | -| mau | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| max | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| maz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mbb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mbh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mbj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mbl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mbs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mbt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mcb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mcd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mcf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mco | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mcp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mcq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mcr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mdy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| med | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mee | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mek | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| meq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| met | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| meu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mey | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mgc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mgh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mgw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mhl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mhr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mib | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mie | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mig | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mih | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mil | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| min | 3 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mio | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mir | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| miz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mjc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mkd | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| mkj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mkl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mkn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mks | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mle | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mlg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mlh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mlp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mlt | 2 | 2 | 2 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | -| mmo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mmx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mna | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mni | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mon | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mos | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mox | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mph | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mpj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mpm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mpp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mps | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mpt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mpx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mqb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mqj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mri | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| msa | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| msb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| msc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| msk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| msm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| msy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mti | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mto | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mui | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mup | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mux | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| muy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mva | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mwc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mwe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mwf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mwp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mwr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mxb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mxp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mxq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mxt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mya | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| myk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| myu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| myw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| myy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| mzz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| naf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nas | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nbl | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nbq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ncj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ncl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ncu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nde | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ndg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ndj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nds | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nep | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nfa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ngp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ngu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nhe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nhg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nhi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nho | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nhr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nhu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nhw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nhy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nii | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nij | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nin | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nko | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nld | 6 | 6 | 1 | 0 | 1 | 0 | 1 | 2 | 2 | 0 | 0 | -| nlg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nna | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nno | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nnq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| noa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nob | 4 | 7 | 5 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | -| noe | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nor | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | -| not | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nou | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nov | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| npi | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| npl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nqo | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nsn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nso | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| nss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ntj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ntp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ntu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nus | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nuy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nvm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nwi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nya | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| nys | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| nyu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| obo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| oci | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| okv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| omw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ong | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ons | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ood | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| opm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ori | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| orm | 1 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| orv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ory | 5 | 4 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | -| ote | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| otm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| otn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| otq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ots | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pag | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pan | 6 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | -| pao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pap | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pbt | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| pcm | 1 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pes | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| pib | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pio | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pir | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| piu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pjt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pls | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| plt | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| plu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pma | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pms | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| poe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| poh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| poi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pol | 4 | 11 | 4 | 0 | 1 | 4 | 0 | 18 | 4 | 0 | 0 | -| pon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| por | 4 | 9 | 1 | 0 | 2 | 2 | 1 | 5 | 3 | 0 | 0 | -| poy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ppo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| prf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| prs | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ptp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ptu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pus | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| pwg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| quc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| quf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| quh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qul | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qup | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| quy | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qvc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qve | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qvh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qvm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qvs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qvw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qvz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qwh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qxh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qxn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| qxo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| raj | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| reg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rej | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rgu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rkb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rmc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rmy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rom | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ron | 5 | 6 | 1 | 0 | 1 | 0 | 1 | 3 | 1 | 0 | 0 | -| roo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| row | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rro | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ruf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rug | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| run | 1 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| rus | 5 | 13 | 6 | 0 | 2 | 4 | 2 | 16 | 4 | 0 | 0 | -| rwo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sag | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sah | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| san | 5 | 3 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | -| sat | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sbe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sbk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sbs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| scn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sco | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| seh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sey | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sgb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sgz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| shi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| shj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| shn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| shp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sim | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sin | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| sja | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| slk | 3 | 4 | 1 | 0 | 1 | 0 | 0 | 3 | 0 | 0 | 0 | -| sll | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| slv | 3 | 4 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | -| smk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| smo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sna | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| snc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| snd | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| snn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| snp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| snx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sny | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| som | 3 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| soq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sot | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| soy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| spa | 4 | 13 | 4 | 0 | 1 | 2 | 2 | 12 | 4 | 0 | 0 | -| spl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| spm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| spp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sps | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| spy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sqi | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| srd | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| srm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| srn | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| srp | 4 | 1 | 1 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | -| srq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ssd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ssg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ssw | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| ssx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| stp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sua | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sun | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| sus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| suz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| svk | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| swa | 1 | 7 | 2 | 0 | 0 | 1 | 1 | 3 | 0 | 0 | 0 | -| swe | 4 | 8 | 3 | 0 | 1 | 1 | 1 | 4 | 0 | 0 | 0 | -| swg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| swh | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| swp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| sxb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| szl | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| taj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tam | 7 | 7 | 2 | 0 | 0 | 1 | 0 | 3 | 1 | 0 | 0 | -| taq | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tat | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| taw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tbf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tbg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tbo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tbz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tcs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tcz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tdt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tee | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tel | 7 | 7 | 2 | 0 | 0 | 0 | 1 | 5 | 2 | 0 | 0 | -| ter | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tet | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tew | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tfr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tgk | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| tgl | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| tgo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tgp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tha | 4 | 8 | 1 | 0 | 0 | 1 | 1 | 6 | 0 | 0 | 0 | -| tif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tim | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tir | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| tiw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tiy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tke | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tku | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tlf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tmd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tna | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tnc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tnk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tnn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tnp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| toc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tod | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tof | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| toj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ton | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| too | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| top | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tpa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tpi | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tpt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tpz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| trc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tsn | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| tso | 1 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| tsw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ttc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tte | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tuc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tuf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tuk | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tum | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tuo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tur | 4 | 7 | 1 | 0 | 0 | 2 | 0 | 3 | 2 | 0 | 0 | -| tvk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| twi | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| txq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| txu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tyv | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tzj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tzl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tzm | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| tzo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ubr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ubu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| udu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| uig | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ukr | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| uli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ulk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| umb | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| upv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ura | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| urb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| urd | 7 | 8 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | -| uri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| urt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| urw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| usa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| usp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| uvh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| uvl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| uzb | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| uzn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| vec | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ven | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| vid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| vie | 5 | 6 | 1 | 0 | 0 | 1 | 0 | 5 | 0 | 0 | 0 | -| viv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| vmy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| waj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wal | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| war | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| wat | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wbi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wbp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wed | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wer | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wim | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wiu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wiv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wln | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wmt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wmw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wnc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wnu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wol | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| wos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wrk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wro | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wrs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wsk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wuu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| wuv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xbi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xed | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xho | 3 | 3 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| xla | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xnn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xsi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xtd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| xtm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yaa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yal | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yaq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yby | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ycn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ydd | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yka | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yle | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yml | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yor | 4 | 5 | 3 | 0 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | -| yrb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yre | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yue | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yuj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yut | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yuw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| yva | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zaa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zaj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zar | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zas | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zat | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zaw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zga | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zho | 2 | 2 | 1 | 0 | 0 | 1 | 1 | 13 | 0 | 0 | 0 | -| zia | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ziw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zlm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zpz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zsm | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| zsr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| ztq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zty | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| zul | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | -| zyp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| Total | 1394 | 795 | 304 | 3 | 28 | 67 | 50 | 457 | 85 | 2 | 2 | +| Language | BitextMining | Classification | Clustering | InstructionRetrieval | MultilabelClassification | PairClassification | Reranking | Retrieval | STS | Speed | Summarization | Sum | +|---|------|------|------|------|------|------|------|------|------|------|------|---| +| aai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aau | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aaz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| abs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| abt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| abx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aby | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ace | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| acf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| acm | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| acq | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| acr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| acu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| adz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aeb | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| aer | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aey | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| afr | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 10 | +| agd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aia | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aii | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ajp | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| aka | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ake | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| alp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| alq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| als | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| aly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ame | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amh | 3 | 6 | 3 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 14 | +| amk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ang | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| anh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| anp | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| anv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aoi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aoj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aom | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apc | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| ape | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ara | 2 | 12 | 0 | 0 | 0 | 2 | 1 | 9 | 2 | 0 | 0 | 28 | +| arb | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 8 | +| are | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| arl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| arn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| arp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| arq | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 4 | +| ars | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| ary | 1 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 7 | +| arz | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| asm | 5 | 3 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 14 | +| aso | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ast | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ata | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| atb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| atd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| atg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| att | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| auc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| auy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| avt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| awa | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| awb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| awk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| awx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ayr | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| azb | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| aze | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| azg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| azj | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| azz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bak | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| bam | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| ban | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| bao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bba | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bbb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bbc | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| bbr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bco | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bdd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bef | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bel | 4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| bem | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ben | 7 | 9 | 2 | 0 | 0 | 1 | 2 | 6 | 1 | 0 | 0 | 28 | +| beo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ber | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| beu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bew | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| bgc | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| bgs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bgt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bhb | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bhd | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bhg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bhl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bho | 2 | 2 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 6 | +| bhp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| big | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjj | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjn | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| bjp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bkd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bki | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bkq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bkx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| blw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| blz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bmh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bmk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bmr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bmu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bnp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bns | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| boa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bod | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| boj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bos | 3 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| box | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| boy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bpr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bps | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bqc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bqp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bra | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bre | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| brx | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| bsj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bsn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bsp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bug | 2 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| buk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bul | 3 | 4 | 1 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 0 | 13 | +| bus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bvd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bvr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bxh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| byr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| byx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bzd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bzh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bzj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| caa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| caf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| car | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cat | 3 | 2 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| cav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cax | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbk | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| cbr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cco | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ceb | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| cek | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ces | 4 | 5 | 2 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 0 | 16 | +| cgc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cha | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| chd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chv | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cjk | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| cjo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cjv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ckb | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| cle | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| clu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cme | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cmn | 4 | 10 | 4 | 0 | 0 | 3 | 4 | 10 | 9 | 0 | 0 | 44 | +| cmo | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| cni | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cnl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cnt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| code | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | 37 | +| cof | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| con | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cor | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cot | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| crh | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| crn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| crx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| csb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cso | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| csy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cta | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cth | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ctp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ctu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cuc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cuk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cut | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cux | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cwe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cya | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cym | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | +| daa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dan | 5 | 9 | 2 | 0 | 1 | 0 | 1 | 5 | 0 | 0 | 0 | 23 | +| ded | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| deu | 6 | 14 | 7 | 0 | 1 | 6 | 2 | 18 | 4 | 0 | 0 | 58 | +| dgc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dgr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dgz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dhg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dik | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| div | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dji | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| djk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| djr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dob | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| doi | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| dop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dov | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dsb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dtp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dwr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dww | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dwy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dyu | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| dza | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dzo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ebk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| eko | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ell | 3 | 6 | 1 | 0 | 1 | 2 | 0 | 3 | 0 | 0 | 0 | 16 | +| emi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| emp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| eng | 16 | 143 | 16 | 3 | 1 | 8 | 8 | 91 | 13 | 2 | 1 | 302 | +| enq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| epo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| eri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ese | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| esk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| est | 2 | 2 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 8 | +| etr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| eus | 3 | 2 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| ewe | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| faa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fao | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | +| far | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fas | 1 | 4 | 0 | 0 | 0 | 1 | 2 | 9 | 0 | 0 | 0 | 17 | +| ffm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fij | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| fil | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| fin | 3 | 5 | 1 | 0 | 1 | 1 | 2 | 5 | 1 | 0 | 0 | 19 | +| fon | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| for | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fra | 7 | 13 | 8 | 0 | 1 | 5 | 3 | 14 | 4 | 0 | 1 | 56 | +| fry | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fuc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fuf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fuh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fur | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| fuv | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| gah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gaw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gaz | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| gbm | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| gdn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gdr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| geb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gfk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ghs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gla | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| gle | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| glg | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| glk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| glv | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gmv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gng | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gnn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gnw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gof | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gom | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| grc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| grn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| gsw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| guh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| guj | 6 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 18 | +| gul | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gum | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gun | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| guo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gup | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gux | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gvc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gvf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gvs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gwi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gym | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gyr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hat | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| hau | 4 | 5 | 3 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 14 | +| haw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hbo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| heb | 4 | 5 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 11 | +| heg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hin | 9 | 12 | 2 | 0 | 0 | 1 | 2 | 10 | 2 | 0 | 0 | 38 | +| hix | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hla | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hlt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hmn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hmo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hne | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| hns | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hot | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hrv | 4 | 3 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 10 | +| hsb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hto | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hun | 5 | 3 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 12 | +| hus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| huu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| huv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hye | 3 | 3 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 9 | +| ian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ibo | 3 | 5 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 12 | +| ido | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ign | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ikk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ikw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ile | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ilo | 2 | 1 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| imo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ina | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| inb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ind | 6 | 7 | 1 | 0 | 0 | 1 | 1 | 4 | 1 | 0 | 0 | 21 | +| ino | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| iou | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ipi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| isl | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| isn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ita | 5 | 9 | 1 | 0 | 1 | 2 | 1 | 5 | 3 | 0 | 0 | 27 | +| iws | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ixl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jae | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jav | 4 | 7 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 13 | +| jic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jiv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jni | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jpn | 5 | 8 | 3 | 0 | 0 | 1 | 3 | 13 | 2 | 0 | 0 | 35 | +| jvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kab | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| kac | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| kam | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kan | 6 | 7 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 19 | +| kaq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kas | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| kat | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 10 | +| kaz | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| kbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kbh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kbm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kbp | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kbq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kdc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kde | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kdl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kea | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| kek | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ken | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kew | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kfg | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kfy | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kgf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kgk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kgp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| khk | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| khm | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| khs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| khz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kik | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| kin | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 8 | +| kir | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| kiw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kiz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kje | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kjs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kkc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kkl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| klt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| klv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmb | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kmg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmr | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| kms | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| knc | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kne | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| knf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| knj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| knv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kon | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kor | 4 | 8 | 1 | 0 | 1 | 2 | 1 | 9 | 3 | 0 | 0 | 29 | +| kos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kqa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kqc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kqf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kql | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kqw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| krc | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ksd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ksj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ksr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ktm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kto | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kud | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kup | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kur | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kvg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kwd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kwf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kwi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kwj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kze | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kzj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lao | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| lat | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| lav | 1 | 2 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| lbb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lbk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lcm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| leu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lex | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lfn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lgl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lij | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| lim | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| lin | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| lit | 4 | 1 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| llg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lmo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| ltg | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| ltz | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| lua | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| lug | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| luo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| lus | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| lvs | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| lww | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| maa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mad | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| mag | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| mai | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| maj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mak | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| mal | 7 | 7 | 2 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 19 | +| mam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| maq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mar | 7 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 2 | 0 | 0 | 20 | +| mau | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| max | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| maz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mco | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mdy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| med | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mee | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mek | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| meq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| met | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| meu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mey | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mgc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mgh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mgw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mhl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mhr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mib | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mie | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mig | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mih | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mil | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| min | 3 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | +| mio | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mir | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| miz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mjc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mkd | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| mkj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mkl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mkn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mks | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mle | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mlg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mlh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mlp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mlt | 2 | 2 | 2 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| mmo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mmx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mna | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mni | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| mon | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| mop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mos | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| mox | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mph | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mps | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mqb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mqj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mri | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| msa | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| msb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| msc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| msk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| msm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| msy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mti | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mto | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mui | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| mup | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| mux | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| muy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mva | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mxb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mxp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mxq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mxt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mya | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| myk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| myu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| myw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| myy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mzz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| naf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nas | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nbl | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nbq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ncj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ncl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ncu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nde | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ndg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ndj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nds | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nep | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| nfa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ngp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ngu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nho | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nii | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nij | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| nin | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nko | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nld | 6 | 6 | 1 | 0 | 1 | 0 | 1 | 2 | 2 | 0 | 0 | 19 | +| nlg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nna | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nno | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | +| nnq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| noa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nob | 4 | 7 | 5 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 19 | +| noe | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nor | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 3 | +| not | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nou | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nov | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| npi | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| npl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nqo | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| nsn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nso | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| nss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ntj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ntp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ntu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nus | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| nuy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nvm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nwi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nya | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| nys | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nyu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| obo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| oci | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| okv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| omw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ong | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ons | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ood | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| opm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ori | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| orm | 1 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| orv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ory | 5 | 4 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 15 | +| ote | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| otm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| otn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| otq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ots | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pag | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| pah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pan | 6 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 18 | +| pao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pap | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| pbt | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| pcm | 1 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| pes | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| pib | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pio | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pir | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| piu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pjt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pls | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| plt | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| plu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pma | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pms | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| poe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| poh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| poi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pol | 4 | 11 | 4 | 0 | 1 | 4 | 0 | 18 | 4 | 0 | 0 | 46 | +| pon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| por | 4 | 9 | 1 | 0 | 2 | 2 | 1 | 5 | 3 | 0 | 0 | 27 | +| poy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ppo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| prf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| prs | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ptp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ptu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pus | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| pwg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| quc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| quf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| quh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qul | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qup | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| quy | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| qvc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qve | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qwh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qxh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qxn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qxo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| raj | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| reg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rej | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| rgu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rkb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rmc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rmy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rom | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| ron | 5 | 6 | 1 | 0 | 1 | 0 | 1 | 3 | 1 | 0 | 0 | 18 | +| roo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| row | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rro | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ruf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rug | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| run | 1 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| rus | 5 | 13 | 6 | 0 | 2 | 4 | 2 | 16 | 4 | 0 | 0 | 52 | +| rwo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sag | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| sah | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| san | 5 | 3 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 10 | +| sat | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| sbe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sbk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sbs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| scn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| sco | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| seh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sey | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sgb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sgz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| shi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| shj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| shn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| shp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sim | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sin | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| sja | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| slk | 3 | 4 | 1 | 0 | 1 | 0 | 0 | 3 | 0 | 0 | 0 | 12 | +| sll | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| slv | 3 | 4 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 10 | +| smk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| smo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| sna | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| snc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| snd | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| snn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| snp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| snx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sny | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| som | 3 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| soq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sot | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| soy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| spa | 4 | 13 | 4 | 0 | 1 | 2 | 2 | 12 | 4 | 0 | 0 | 42 | +| spl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| spm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| spp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sps | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| spy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sqi | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| srd | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| sri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| srm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| srn | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| srp | 4 | 1 | 1 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 9 | +| srq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ssd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ssg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ssw | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| ssx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| stp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sua | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sun | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| sus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| suz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| svk | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| swa | 1 | 7 | 2 | 0 | 0 | 1 | 1 | 3 | 0 | 0 | 0 | 15 | +| swe | 4 | 8 | 3 | 0 | 1 | 1 | 1 | 4 | 0 | 0 | 0 | 22 | +| swg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| swh | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| swp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sxb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| szl | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| tac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| taj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tam | 7 | 7 | 2 | 0 | 0 | 1 | 0 | 3 | 1 | 0 | 0 | 21 | +| taq | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| tat | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| tav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| taw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tcs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tcz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tdt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tee | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tel | 7 | 7 | 2 | 0 | 0 | 0 | 1 | 5 | 2 | 0 | 0 | 24 | +| ter | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tet | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tew | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tfr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tgk | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| tgl | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| tgo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tgp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tha | 4 | 8 | 1 | 0 | 0 | 1 | 1 | 6 | 0 | 0 | 0 | 21 | +| tif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tim | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tir | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| tiw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tiy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tke | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tku | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tlf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tmd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tna | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tnc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tnk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tnn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tnp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| toc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tod | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tof | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| toj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ton | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| too | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| top | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tpa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tpi | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| tpt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tpz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| trc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tsn | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| tso | 1 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| tsw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ttc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tte | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tuc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tuf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tuk | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| tum | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| tuo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tur | 4 | 7 | 1 | 0 | 0 | 2 | 0 | 3 | 2 | 0 | 0 | 19 | +| tvk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| twi | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| txq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| txu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tyv | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tzj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tzl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tzm | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| tzo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ubr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ubu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| udu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| uig | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| ukr | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| uli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ulk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| umb | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| upv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ura | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| urb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| urd | 7 | 8 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 19 | +| uri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| urt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| urw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| usa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| usp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| uvh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| uvl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| uzb | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| uzn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| vec | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| ven | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| vid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| vie | 5 | 6 | 1 | 0 | 0 | 1 | 0 | 5 | 0 | 0 | 0 | 18 | +| viv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| vmy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| waj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wal | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| war | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| wat | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wbi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wbp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wed | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wer | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wim | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wiu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wiv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wln | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wmt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wmw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wnc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wnu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wol | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| wos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wrk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wro | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wrs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wsk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wuu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wuv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xbi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xed | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xho | 3 | 3 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 10 | +| xla | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xnn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xsi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xtd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xtm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yaa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yal | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yaq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yby | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ycn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ydd | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| yid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yka | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yle | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yml | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yor | 4 | 5 | 3 | 0 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | 16 | +| yrb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yre | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yue | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| yuj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yut | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yuw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yva | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zaa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zaj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zar | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zas | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zat | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zaw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zga | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zho | 2 | 2 | 1 | 0 | 0 | 1 | 1 | 13 | 0 | 0 | 0 | 20 | +| zia | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ziw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zlm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zsm | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| zsr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ztq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zty | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zul | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| zyp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| Total | None | 1394 | 795 | 304 | 3 | 28 | 67 | 50 | 457 | 85 | 2 | 2 |
From fde124a8a0894838aabca90b061191e74c33a82f Mon Sep 17 00:00:00 2001 From: Napuh <55241721+Napuh@users.noreply.github.com> Date: Tue, 19 Nov 2024 15:34:49 +0100 Subject: [PATCH 17/76] fix: pinned datasets to <3.0.0 (#1470) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b39544d323..baa89fb16e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ classifiers = [ ] requires-python = ">=3.9" dependencies = [ - "datasets>=2.19.0", + "datasets>=2.19.0,<3.0.0", "numpy>=1.0.0,<3.0.0", "requests>=2.26.0", "scikit_learn>=1.0.2", From 7186e04a5afe7f3ae0898d601487dcb7f0785cf7 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 19 Nov 2024 14:52:45 +0000 Subject: [PATCH 18/76] 1.19.10 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index baa89fb16e..38a797318a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.19.9" +version = "1.19.10" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 1cc6c9e0fe62ca4e77708b641823fa1a121f048b Mon Sep 17 00:00:00 2001 From: Daniel Buades Marcos Date: Thu, 21 Nov 2024 13:46:55 -0500 Subject: [PATCH 19/76] feat: add CUREv1 retrieval dataset (#1459) * feat: add CUREv1 dataset --------- Co-authored-by: nadshe Co-authored-by: olivierr42 Co-authored-by: Daniel Buades Marcos * feat: add missing domains to medical tasks * feat: modify benchmark tasks * chore: benchmark naming --------- Co-authored-by: nadshe Co-authored-by: olivierr42 --- mteb/__init__.py | 2 + mteb/benchmarks/benchmarks.py | 24 +++ mteb/tasks/Reranking/zho/CMTEBReranking.py | 2 +- mteb/tasks/Retrieval/__init__.py | 1 + mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py | 2 +- mteb/tasks/Retrieval/eng/SciFactRetrieval.py | 2 +- .../tasks/Retrieval/eng/TRECCOVIDRetrieval.py | 2 +- .../Retrieval/multilingual/CUREv1Retrieval.py | 151 ++++++++++++++++++ .../tasks/Retrieval/pol/SciFactPLRetrieval.py | 2 +- .../Retrieval/pol/TRECCOVIDPLRetrieval.py | 2 +- mteb/tasks/Retrieval/zho/CMTEBRetrieval.py | 2 +- 11 files changed, 185 insertions(+), 7 deletions(-) create mode 100644 mteb/tasks/Retrieval/multilingual/CUREv1Retrieval.py diff --git a/mteb/__init__.py b/mteb/__init__.py index 1ef561a5f1..6de017b1f1 100644 --- a/mteb/__init__.py +++ b/mteb/__init__.py @@ -6,6 +6,7 @@ MTEB_ENG_CLASSIC, MTEB_MAIN_RU, MTEB_RETRIEVAL_LAW, + MTEB_RETRIEVAL_MEDICAL, MTEB_RETRIEVAL_WITH_INSTRUCTIONS, CoIR, ) @@ -24,6 +25,7 @@ "MTEB_ENG_CLASSIC", "MTEB_MAIN_RU", "MTEB_RETRIEVAL_LAW", + "MTEB_RETRIEVAL_MEDICAL", "MTEB_RETRIEVAL_WITH_INSTRUCTIONS", "CoIR", "TASKS_REGISTRY", diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 4b5c53c2c7..9aaefda3cb 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -308,6 +308,29 @@ def load_results( citation=None, ) +MTEB_RETRIEVAL_MEDICAL = Benchmark( + name="MTEB(Medical)", + tasks=get_tasks( + tasks=[ + "CUREv1", + "NFCorpus", + "TRECCOVID", + "TRECCOVID-PL", + "SciFact", + "SciFact-PL", + "MedicalQARetrieval", + "PublicHealthQA", + "MedrxivClusteringP2P.v2", + "MedrxivClusteringS2S.v2", + "CmedqaRetrieval", + "CMedQAv2-reranking", + ], + ), + description="A curated set of MTEB tasks designed to evaluate systems in the context of medical information retrieval.", + reference="", + citation=None, +) + MTEB_MINERS_BITEXT_MINING = Benchmark( name="MINERSBitextMining", tasks=get_tasks( @@ -702,6 +725,7 @@ def load_results( "SpartQA", "TempReasonL1", "TRECCOVID", + "CUREv1", "WinoGrande", "BelebeleRetrieval", "MLQARetrieval", diff --git a/mteb/tasks/Reranking/zho/CMTEBReranking.py b/mteb/tasks/Reranking/zho/CMTEBReranking.py index 302f62adf5..7a33f7ae0a 100644 --- a/mteb/tasks/Reranking/zho/CMTEBReranking.py +++ b/mteb/tasks/Reranking/zho/CMTEBReranking.py @@ -128,7 +128,7 @@ class CMedQAv2(AbsTaskReranking): main_score="map", date=None, form=None, - domains=None, + domains=["Medical", "Written"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/Retrieval/__init__.py b/mteb/tasks/Retrieval/__init__.py index f8a47b08a9..ca41d4354f 100644 --- a/mteb/tasks/Retrieval/__init__.py +++ b/mteb/tasks/Retrieval/__init__.py @@ -105,6 +105,7 @@ from .multilingual.BelebeleRetrieval import * from .multilingual.CrossLingualSemanticDiscriminationWMT19 import * from .multilingual.CrossLingualSemanticDiscriminationWMT21 import * +from .multilingual.CUREv1Retrieval import * from .multilingual.IndicQARetrieval import * from .multilingual.MintakaRetrieval import * from .multilingual.MIRACLRetrieval import * diff --git a/mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py b/mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py index 7c40b6707b..31f4eb60b1 100644 --- a/mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py +++ b/mteb/tasks/Retrieval/eng/NFCorpusRetrieval.py @@ -21,7 +21,7 @@ class NFCorpus(AbsTaskRetrieval): eval_langs=["eng-Latn"], main_score="ndcg_at_10", date=None, - domains=None, + domains=["Medical", "Academic", "Written"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/Retrieval/eng/SciFactRetrieval.py b/mteb/tasks/Retrieval/eng/SciFactRetrieval.py index 05e9a6e541..1dc47d8b66 100644 --- a/mteb/tasks/Retrieval/eng/SciFactRetrieval.py +++ b/mteb/tasks/Retrieval/eng/SciFactRetrieval.py @@ -21,7 +21,7 @@ class SciFact(AbsTaskRetrieval): eval_langs=["eng-Latn"], main_score="ndcg_at_10", date=None, - domains=None, + domains=["Academic", "Medical", "Written"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/Retrieval/eng/TRECCOVIDRetrieval.py b/mteb/tasks/Retrieval/eng/TRECCOVIDRetrieval.py index 6c7b7f01d1..00c96c0d04 100644 --- a/mteb/tasks/Retrieval/eng/TRECCOVIDRetrieval.py +++ b/mteb/tasks/Retrieval/eng/TRECCOVIDRetrieval.py @@ -21,7 +21,7 @@ class TRECCOVID(AbsTaskRetrieval): eval_langs=["eng-Latn"], main_score="ndcg_at_10", date=None, - domains=None, + domains=["Medical", "Academic", "Written"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/Retrieval/multilingual/CUREv1Retrieval.py b/mteb/tasks/Retrieval/multilingual/CUREv1Retrieval.py new file mode 100644 index 0000000000..6e97786a77 --- /dev/null +++ b/mteb/tasks/Retrieval/multilingual/CUREv1Retrieval.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +from enum import Enum + +from datasets import DatasetDict, load_dataset + +from mteb.abstasks.TaskMetadata import TaskMetadata + +from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from ....abstasks.MultilingualTask import MultilingualTask + +_LANGUAGES = { + "en": ["eng-Latn", "eng-Latn"], + "es": ["spa-Latn", "eng-Latn"], + "fr": ["fra-Latn", "eng-Latn"], +} + + +class CUREv1Splits(str, Enum): + all = "All" + dentistry_and_oral_health = "Dentistry and Oral Health" + dermatology = "Dermatology" + gastroenterology = "Gastroenterology" + genetics = "Genetics" + neuroscience_and_neurology = "Neuroscience and Neurology" + orthopedic_surgery = "Orthopedic Surgery" + otorhinolaryngology = "Otorhinolaryngology" + plastic_surgery = "Plastic Surgery" + psychiatry_and_psychology = "Psychiatry and Psychology" + pulmonology = "Pulmonology" + + @classmethod + def names(cls) -> list[str]: + return sorted(cls._member_names_) + + +class CUREv1Retrieval(MultilingualTask, AbsTaskRetrieval): + metadata = TaskMetadata( + dataset={ + "path": "clinia/CUREv1", + "revision": "3bcf51c91e04d04a8a3329dfbe988b964c5cbe83", + }, + name="CUREv1", + description="Collection of query-passage pairs curated by medical professionals, across 10 disciplines and 3 cross-lingual settings.", + type="Retrieval", + modalities=["text"], + category="s2p", + reference="https://huggingface.co/datasets/clinia/CUREv1", + eval_splits=CUREv1Splits.names(), + eval_langs=_LANGUAGES, + main_score="ndcg_at_10", + date=("2024-01-01", "2024-10-31"), + domains=["Medical", "Academic", "Written"], + task_subtypes=[], + license="cc-by-nc-4.0", + annotations_creators="expert-annotated", + dialect=[], + sample_creation="created", + bibtex_citation="", + prompt={ + "query": "Given a question by a medical professional, retrieve relevant passages that best answer the question", + }, + ) + + def _load_corpus(self, split: str, cache_dir: str | None = None): + ds = load_dataset( + path=self.metadata_dict["dataset"]["path"], + revision=self.metadata_dict["dataset"]["revision"], + name="corpus", + split=split, + cache_dir=cache_dir, + ) + + corpus = { + doc["_id"]: {"title": doc["title"], "text": doc["text"]} for doc in ds + } + + return corpus + + def _load_qrels(self, split: str, cache_dir: str | None = None): + ds = load_dataset( + path=self.metadata_dict["dataset"]["path"], + revision=self.metadata_dict["dataset"]["revision"], + name="qrels", + split=split, + cache_dir=cache_dir, + ) + + qrels = {} + + for qrel in ds: + query_id = qrel["query-id"] + doc_id = qrel["corpus-id"] + score = int(qrel["score"]) + if query_id not in qrels: + qrels[query_id] = {} + qrels[query_id][doc_id] = score + + return qrels + + def _load_queries(self, split: str, language: str, cache_dir: str | None = None): + ds = load_dataset( + path=self.metadata_dict["dataset"]["path"], + revision=self.metadata_dict["dataset"]["revision"], + name=f"queries-{language}", + split=split, + cache_dir=cache_dir, + ) + + queries = {query["_id"]: query["text"] for query in ds} + + return queries + + def load_data(self, **kwargs): + if self.data_loaded: + return + + eval_splits = kwargs.get("eval_splits", self.metadata.eval_splits) + languages = kwargs.get("eval_langs", self.metadata.eval_langs) + cache_dir = kwargs.get("cache_dir", None) + + # Iterate over splits and languages + corpus = { + language: {split: None for split in eval_splits} for language in languages + } + queries = { + language: {split: None for split in eval_splits} for language in languages + } + relevant_docs = { + language: {split: None for split in eval_splits} for language in languages + } + for split in eval_splits: + # Since this is a cross-lingual dataset, the corpus and the relevant documents do not depend on the language + split_corpus = self._load_corpus(split=split, cache_dir=cache_dir) + split_qrels = self._load_qrels(split=split, cache_dir=cache_dir) + + # Queries depend on the language + for language in languages: + corpus[language][split] = split_corpus + relevant_docs[language][split] = split_qrels + + queries[language][split] = self._load_queries( + split=split, language=language, cache_dir=cache_dir + ) + + # Convert into DatasetDict + self.corpus = DatasetDict(corpus) + self.queries = DatasetDict(queries) + self.relevant_docs = DatasetDict(relevant_docs) + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/pol/SciFactPLRetrieval.py b/mteb/tasks/Retrieval/pol/SciFactPLRetrieval.py index 2588b1c288..92d61b42bd 100644 --- a/mteb/tasks/Retrieval/pol/SciFactPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/SciFactPLRetrieval.py @@ -22,7 +22,7 @@ class SciFactPL(AbsTaskRetrieval): eval_langs=["pol-Latn"], main_score="ndcg_at_10", date=None, - domains=None, + domains=["Academic", "Medical", "Written"], task_subtypes=None, license=None, annotations_creators=None, diff --git a/mteb/tasks/Retrieval/pol/TRECCOVIDPLRetrieval.py b/mteb/tasks/Retrieval/pol/TRECCOVIDPLRetrieval.py index 4ba6a9ac00..f9f331191a 100644 --- a/mteb/tasks/Retrieval/pol/TRECCOVIDPLRetrieval.py +++ b/mteb/tasks/Retrieval/pol/TRECCOVIDPLRetrieval.py @@ -25,7 +25,7 @@ class TRECCOVIDPL(AbsTaskRetrieval): "2019-12-01", "2022-12-31", ), # approximate date of covid pandemic start and end (best guess) - domains=["Academic", "Non-fiction", "Written"], + domains=["Academic", "Medical", "Non-fiction", "Written"], task_subtypes=["Article retrieval"], license="not specified", annotations_creators="derived", diff --git a/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py b/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py index 08674ec8c8..ad26652ccd 100644 --- a/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py +++ b/mteb/tasks/Retrieval/zho/CMTEBRetrieval.py @@ -236,7 +236,7 @@ class CmedqaRetrieval(AbsTaskRetrieval): eval_langs=["cmn-Hans"], main_score="ndcg_at_10", date=None, - domains=None, + domains=["Medical", "Written"], task_subtypes=None, license=None, annotations_creators=None, From 440871739689a175d0e4a8c538ce428a4e27e350 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 21 Nov 2024 18:49:12 +0000 Subject: [PATCH 20/76] Update tasks table --- docs/tasks.md | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/docs/tasks.md b/docs/tasks.md index 54424029d2..194f7ba70f 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -63,7 +63,7 @@ The following tables give you an overview of the tasks in MTEB. | [CLSClusteringP2P.v2](https://arxiv.org/abs/2209.05034) (Yudong Li, 2022) | ['cmn'] | Clustering | p2p | [Academic, Written] | None | None | | [CLSClusteringS2S.v2](https://arxiv.org/abs/2209.05034) (Yudong Li, 2022) | ['cmn'] | Clustering | s2s | [Academic, Written] | None | None | | [CMedQAv1-reranking](https://github.com/zhangsheng93/cMedQA) (Zhang et al., 2017) | ['cmn'] | Reranking | s2s | [Medical, Written] | None | None | -| [CMedQAv2-reranking](https://github.com/zhangsheng93/cMedQA2) (S. Zhang, 2018) | ['cmn'] | Reranking | s2s | | None | None | +| [CMedQAv2-reranking](https://github.com/zhangsheng93/cMedQA2) (S. Zhang, 2018) | ['cmn'] | Reranking | s2s | [Medical, Written] | None | None | | [COIRCodeSearchNetRetrieval](https://huggingface.co/datasets/code_search_net/) (Husain et al., 2019) | ['go', 'java', 'javascript', 'php', 'python', 'ruby'] | Retrieval | p2p | [Programming, Written] | {'test': 1056326} | {'test': {'number_of_characters': 36843313, 'num_samples': 1056326, 'num_queries': 52561, 'num_documents': 1003765, 'min_document_length': 54, 'average_document_length': 34.71, 'max_document_length': 334374, 'unique_documents': 1003765, 'min_query_length': 2, 'average_query_length': 38.19, 'max_query_length': 2, 'unique_queries': 52561, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 52561, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 14574651, 'num_samples': 295228, 'num_queries': 14918, 'num_documents': 280310, 'min_document_length': 95, 'average_document_length': 49.99, 'max_document_length': 14008, 'unique_documents': 280310, 'min_query_length': 2, 'average_query_length': 37.58, 'max_query_length': 2, 'unique_queries': 14918, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14918}, 'javascript': {'number_of_characters': 2587540, 'num_samples': 68145, 'num_queries': 3291, 'num_documents': 64854, 'min_document_length': 87, 'average_document_length': 37.9, 'max_document_length': 334374, 'unique_documents': 64854, 'min_query_length': 2, 'average_query_length': 39.41, 'max_query_length': 2, 'unique_queries': 3291, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3291}, 'go': {'number_of_characters': 3641108, 'num_samples': 190562, 'num_queries': 8122, 'num_documents': 182440, 'min_document_length': 54, 'average_document_length': 17.96, 'max_document_length': 5280, 'unique_documents': 182440, 'min_query_length': 2, 'average_query_length': 44.92, 'max_query_length': 2, 'unique_queries': 8122, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 8122}, 'ruby': {'number_of_characters': 629446, 'num_samples': 28831, 'num_queries': 1261, 'num_documents': 27570, 'min_document_length': 83, 'average_document_length': 20.83, 'max_document_length': 3992, 'unique_documents': 27570, 'min_query_length': 2, 'average_query_length': 43.73, 'max_query_length': 2, 'unique_queries': 1261, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1261}, 'java': {'number_of_characters': 6791137, 'num_samples': 191821, 'num_queries': 10955, 'num_documents': 180866, 'min_document_length': 77, 'average_document_length': 35.55, 'max_document_length': 7615, 'unique_documents': 180866, 'min_query_length': 2, 'average_query_length': 33.02, 'max_query_length': 2, 'unique_queries': 10955, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10955}, 'php': {'number_of_characters': 8619431, 'num_samples': 281739, 'num_queries': 14014, 'num_documents': 267725, 'min_document_length': 94, 'average_document_length': 30.2, 'max_document_length': 4904, 'unique_documents': 267725, 'min_query_length': 2, 'average_query_length': 38.21, 'max_query_length': 2, 'unique_queries': 14014, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14014}}}} | | [CPUSpeedTask](https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/blob/c8376f967d1294419be1d3eb41217d04cd3a65d3/src/seb/registered_tasks/speed.py#L83-L96) | ['eng'] | Speed | s2s | [Fiction, Written] | None | None | | [CQADupstackAndroidRetrieval](http://nlp.cis.unimelb.edu.au/resources/cqadupstack/) (Hoogeveen et al., 2015) | ['eng'] | Retrieval | s2p | | None | None | @@ -119,11 +119,12 @@ The following tables give you an overview of the tasks in MTEB. | [CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [CUADVolumeRestrictionLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [CUADWarrantyDurationLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | +| [CUREv1](https://huggingface.co/datasets/clinia/CUREv1) | ['eng', 'fra', 'spa'] | Retrieval | s2p | [Medical, Academic, Written] | None | None | | [CanadaTaxCourtOutcomesLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [CataloniaTweetClassification](https://aclanthology.org/2020.lrec-1.171/) | ['cat', 'spa'] | Classification | s2s | [Social, Government, Written] | None | None | | [ClimateFEVER](https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html) (Thomas Diggelmann, 2021) | ['eng'] | Retrieval | s2p | | None | None | | [ClimateFEVERHardNegatives](https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html) (Thomas Diggelmann, 2021) | ['eng'] | Retrieval | s2p | | None | None | -| [CmedqaRetrieval](https://aclanthology.org/2022.emnlp-main.357.pdf) | ['cmn'] | Retrieval | s2p | | None | None | +| [CmedqaRetrieval](https://aclanthology.org/2022.emnlp-main.357.pdf) | ['cmn'] | Retrieval | s2p | [Medical, Written] | None | None | | [Cmnli](https://huggingface.co/datasets/clue/viewer/cmnli) | ['cmn'] | PairClassification | s2s | | None | None | | [CodeEditSearchRetrieval](https://huggingface.co/datasets/cassanof/CodeEditSearch/viewer) (Niklas Muennighoff, 2023) | ['c', 'c++', 'go', 'java', 'javascript', 'php', 'python', 'ruby', 'rust', 'scala', 'shell', 'swift', 'typescript'] | Retrieval | p2p | [Programming, Written] | {'train': 26000} | {'train': {'number_of_characters': 935841, 'num_samples': 26000, 'num_queries': 13000, 'num_documents': 13000, 'min_document_length': 18, 'average_document_length': 70.99, 'max_document_length': 2532, 'unique_documents': 13000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 13000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 13000, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 70519, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 21, 'average_document_length': 69.52, 'max_document_length': 1811, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'javascript': {'number_of_characters': 57880, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 18, 'average_document_length': 56.88, 'max_document_length': 601, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'typescript': {'number_of_characters': 61092, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 60.09, 'max_document_length': 659, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'go': {'number_of_characters': 71797, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 70.8, 'max_document_length': 1529, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'ruby': {'number_of_characters': 67900, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 20, 'average_document_length': 66.9, 'max_document_length': 751, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'java': {'number_of_characters': 63984, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 23, 'average_document_length': 62.98, 'max_document_length': 807, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'php': {'number_of_characters': 62927, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 21, 'average_document_length': 61.93, 'max_document_length': 766, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'c': {'number_of_characters': 98588, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 20, 'average_document_length': 97.59, 'max_document_length': 1672, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'c++': {'number_of_characters': 115480, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 22, 'average_document_length': 114.48, 'max_document_length': 1856, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'rust': {'number_of_characters': 68503, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 67.5, 'max_document_length': 2532, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'swift': {'number_of_characters': 58279, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 57.28, 'max_document_length': 727, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'scala': {'number_of_characters': 65833, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 22, 'average_document_length': 64.83, 'max_document_length': 685, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'shell': {'number_of_characters': 73059, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 18, 'average_document_length': 72.06, 'max_document_length': 813, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}}}} | | [CodeFeedbackMT](https://arxiv.org/abs/2402.14658) (Tianyu Zheng, 2024) | ['eng'] | Retrieval | p2p | [Programming, Written] | {'test': 79660} | {'test': {'number_of_characters': 156266302, 'num_samples': 79660, 'num_queries': 13277, 'num_documents': 66383, 'min_document_length': 127, 'average_document_length': 885.13, 'max_document_length': 32432, 'unique_documents': 66383, 'min_query_length': 2, 'average_query_length': 7344.18, 'max_query_length': 9403, 'unique_queries': 13277, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 13277}} | @@ -347,7 +348,7 @@ The following tables give you an overview of the tasks in MTEB. | [MultilingualSentiment](https://github.com/tyqiangz/multilingual-sentiment-datasets) | ['cmn'] | Classification | s2s | | None | None | | [MultilingualSentimentClassification](https://huggingface.co/datasets/mteb/multilingual-sentiment-classification) | ['ara', 'bam', 'bul', 'cmn', 'cym', 'deu', 'dza', 'ell', 'eng', 'eus', 'fas', 'fin', 'heb', 'hrv', 'ind', 'jpn', 'kor', 'mlt', 'nor', 'pol', 'rus', 'slk', 'spa', 'tha', 'tur', 'uig', 'urd', 'vie', 'zho'] | Classification | s2s | [Reviews, Written] | None | None | | [MyanmarNews](https://huggingface.co/datasets/myanmar_news) (A. H. Khine, 2017) | ['mya'] | Classification | p2p | [News, Written] | None | None | -| [NFCorpus](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) (Boteva et al., 2016) | ['eng'] | Retrieval | s2p | | {'test': 3956} | {'test': {'number_of_characters': 1612.55, 'num_samples': 3956, 'num_queries': 323, 'num_documents': 3633, 'average_document_length': 0.44, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 38.19}} | +| [NFCorpus](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) (Boteva et al., 2016) | ['eng'] | Retrieval | s2p | [Medical, Academic, Written] | {'test': 3956} | {'test': {'number_of_characters': 1612.55, 'num_samples': 3956, 'num_queries': 323, 'num_documents': 3633, 'average_document_length': 0.44, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 38.19}} | | [NFCorpus-PL](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | | None | None | | [NLPJournalAbsIntroRetrieval](https://github.com/sbintuitions/JMTEB) | ['jpn'] | Retrieval | s2s | [Academic, Written] | None | None | | [NLPJournalTitleAbsRetrieval](https://github.com/sbintuitions/JMTEB) | ['jpn'] | Retrieval | s2s | [Academic, Written] | None | None | @@ -482,8 +483,8 @@ The following tables give you an overview of the tasks in MTEB. | [SanskritShlokasClassification](https://github.com/goru001/nlp-for-sanskrit) | ['san'] | Classification | s2s | [Religious, Written] | None | None | | [ScalaClassification](https://aclanthology.org/2023.nodalida-1.20/) | ['dan', 'nno', 'nob', 'swe'] | Classification | s2s | [Fiction, News, Non-fiction, Blog, Spoken, Web, Written] | None | None | | [SciDocsRR](https://allenai.org/data/scidocs) | ['eng'] | Reranking | s2s | [Academic, Non-fiction, Written] | None | None | -| [SciFact](https://github.com/allenai/scifact) (Arman Cohan, 2020) | ['eng'] | Retrieval | s2p | | None | None | -| [SciFact-PL](https://github.com/allenai/scifact) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | | None | None | +| [SciFact](https://github.com/allenai/scifact) (Arman Cohan, 2020) | ['eng'] | Retrieval | s2p | [Academic, Medical, Written] | None | None | +| [SciFact-PL](https://github.com/allenai/scifact) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | [Academic, Medical, Written] | None | None | | [SemRel24STS](https://huggingface.co/datasets/SemRel/SemRel2024) (Nedjma Ousidhoum, 2024) | ['afr', 'amh', 'arb', 'arq', 'ary', 'eng', 'hau', 'hin', 'ind', 'kin', 'mar', 'tel'] | STS | s2s | [Spoken, Written] | None | None | | [SensitiveTopicsClassification](https://aclanthology.org/2021.bsnlp-1.4) | ['rus'] | MultilabelClassification | s2s | [Web, Social, Written] | None | None | | [SentimentAnalysisHindi](https://huggingface.co/datasets/OdiaGenAI/sentiment_analysis_hindi) (Shantipriya Parida, 2023) | ['hin'] | Classification | s2s | [Reviews, Written] | None | None | @@ -523,8 +524,8 @@ The following tables give you an overview of the tasks in MTEB. | [T2Retrieval](https://arxiv.org/abs/2304.03679) (Xiaohui Xie, 2023) | ['cmn'] | Retrieval | s2p | | None | None | | [TERRa](https://arxiv.org/pdf/2010.15925) (Shavrina et al., 2020) | ['rus'] | PairClassification | s2s | [News, Web, Written] | None | None | | [TNews](https://www.cluebenchmarks.com/introduce.html) | ['cmn'] | Classification | s2s | | None | None | -| [TRECCOVID](https://ir.nist.gov/covidSubmit/index.html) (Kirk Roberts, 2021) | ['eng'] | Retrieval | s2p | | None | None | -| [TRECCOVID-PL](https://ir.nist.gov/covidSubmit/index.html) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | [Academic, Non-fiction, Written] | None | None | +| [TRECCOVID](https://ir.nist.gov/covidSubmit/index.html) (Kirk Roberts, 2021) | ['eng'] | Retrieval | s2p | [Medical, Academic, Written] | None | None | +| [TRECCOVID-PL](https://ir.nist.gov/covidSubmit/index.html) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | [Academic, Medical, Non-fiction, Written] | None | None | | [TV2Nordretrieval](https://huggingface.co/datasets/alexandrainst/nordjylland-news-summarization) | ['dan'] | Retrieval | p2p | [News, Non-fiction, Written] | None | None | | [TamilNewsClassification](https://github.com/vanangamudi/tamil-news-classification) (Anoop Kunchukuttan, 2020) | ['tam'] | Classification | s2s | [News, Written] | None | None | | [Tatoeba](https://github.com/facebookresearch/LASER/tree/main/data/tatoeba/v1) (Tatoeba community, 2021) | ['afr', 'amh', 'ang', 'ara', 'arq', 'arz', 'ast', 'awa', 'aze', 'bel', 'ben', 'ber', 'bos', 'bre', 'bul', 'cat', 'cbk', 'ceb', 'ces', 'cha', 'cmn', 'cor', 'csb', 'cym', 'dan', 'deu', 'dsb', 'dtp', 'ell', 'eng', 'epo', 'est', 'eus', 'fao', 'fin', 'fra', 'fry', 'gla', 'gle', 'glg', 'gsw', 'heb', 'hin', 'hrv', 'hsb', 'hun', 'hye', 'ido', 'ile', 'ina', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kat', 'kaz', 'khm', 'kor', 'kur', 'kzj', 'lat', 'lfn', 'lit', 'lvs', 'mal', 'mar', 'max', 'mhr', 'mkd', 'mon', 'nds', 'nld', 'nno', 'nob', 'nov', 'oci', 'orv', 'pam', 'pes', 'pms', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'swg', 'swh', 'tam', 'tat', 'tel', 'tgl', 'tha', 'tuk', 'tur', 'tzl', 'uig', 'ukr', 'urd', 'uzb', 'vie', 'war', 'wuu', 'xho', 'yid', 'yue', 'zsm'] | BitextMining | s2s | [Written] | None | None | @@ -888,7 +889,7 @@ The following tables give you an overview of the tasks in MTEB. | ell | 3 | 6 | 1 | 0 | 1 | 2 | 0 | 3 | 0 | 0 | 0 | 16 | | emi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | emp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| eng | 16 | 143 | 16 | 3 | 1 | 8 | 8 | 91 | 13 | 2 | 1 | 302 | +| eng | 16 | 143 | 16 | 3 | 1 | 8 | 8 | 92 | 13 | 2 | 1 | 303 | | enq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | epo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | eri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -909,7 +910,7 @@ The following tables give you an overview of the tasks in MTEB. | fin | 3 | 5 | 1 | 0 | 1 | 1 | 2 | 5 | 1 | 0 | 0 | 19 | | fon | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | for | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| fra | 7 | 13 | 8 | 0 | 1 | 5 | 3 | 14 | 4 | 0 | 1 | 56 | +| fra | 7 | 13 | 8 | 0 | 1 | 5 | 3 | 15 | 4 | 0 | 1 | 57 | | fry | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fuc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1434,7 +1435,7 @@ The following tables give you an overview of the tasks in MTEB. | soq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sot | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | | soy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| spa | 4 | 13 | 4 | 0 | 1 | 2 | 2 | 12 | 4 | 0 | 0 | 42 | +| spa | 4 | 13 | 4 | 0 | 1 | 2 | 2 | 13 | 4 | 0 | 0 | 43 | | spl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | spm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | spp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1660,7 +1661,7 @@ The following tables give you an overview of the tasks in MTEB. | zty | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zul | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | | zyp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| Total | None | 1394 | 795 | 304 | 3 | 28 | 67 | 50 | 457 | 85 | 2 | 2 | +| Total | None | 1394 | 795 | 304 | 3 | 28 | 67 | 50 | 460 | 85 | 2 | 2 | From 3ff38ec043f6ebc4e434c083cf96dc823439f8a3 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 19:04:26 +0000 Subject: [PATCH 21/76] 1.20.0 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 38a797318a..fec35b7e08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.19.10" +version = "1.20.0" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 917ad7f23704edc974c407efda20edc71375041d Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Tue, 26 Nov 2024 21:02:31 +0500 Subject: [PATCH 22/76] fix: check if `model` attr of model exists (#1499) * check if model attr of model exists * lint * Fix retrieval evaluator --- mteb/evaluation/evaluators/RetrievalEvaluator.py | 8 +++++--- mteb/models/sentence_transformer_wrapper.py | 5 ++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/mteb/evaluation/evaluators/RetrievalEvaluator.py b/mteb/evaluation/evaluators/RetrievalEvaluator.py index 4b2596c4d5..20a29b3ad5 100644 --- a/mteb/evaluation/evaluators/RetrievalEvaluator.py +++ b/mteb/evaluation/evaluators/RetrievalEvaluator.py @@ -268,7 +268,7 @@ def search_cross_encoder( for qid in queries.keys(): if self.previous_results is None: # try to use all of them - logging.logging( + logging.info( f"previous_results is None. Using all the documents to rerank: {len(corpus)}" ) q_results = {doc_id: 0.0 for doc_id in corpus.keys()} @@ -318,7 +318,9 @@ def search_cross_encoder( len(queries_in_pair) == len(corpus_in_pair) == len(instructions_in_pair) ) - if isinstance(self.model.model, CrossEncoder): + if hasattr(self.model, "model") and isinstance( + self.model.model, CrossEncoder + ): # can't take instructions, so add them here queries_in_pair = [ f"{q} {i}".strip() @@ -428,7 +430,7 @@ def encode( def is_cross_encoder_compatible(model) -> bool: - op = getattr(model.model, "predict", None) + op = getattr(model, "predict", None) return callable(op) diff --git a/mteb/models/sentence_transformer_wrapper.py b/mteb/models/sentence_transformer_wrapper.py index 5cc824fa82..13d39e4031 100644 --- a/mteb/models/sentence_transformer_wrapper.py +++ b/mteb/models/sentence_transformer_wrapper.py @@ -53,6 +53,9 @@ def __init__( self.model.prompts = model_prompts self.model_prompts = self.validate_task_to_prompt_name(model_prompts) + if isinstance(self.model, CrossEncoder): + self.predict = self._predict + def encode( self, sentences: Sequence[str], @@ -106,7 +109,7 @@ def encode( embeddings = embeddings.cpu().detach().float().numpy() return embeddings - def predict( + def _predict( self, sentences: Sequence[str], **kwargs: Any, From cde720e34aa3ad8e5aa768fe1d7842cc73801aaa Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 26 Nov 2024 16:20:15 +0000 Subject: [PATCH 23/76] 1.20.1 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fec35b7e08..6d36d2b7ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.20.0" +version = "1.20.1" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 0affa31c23727889f56f4bb27da9154ce13ed67a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20Kardos?= Date: Wed, 27 Nov 2024 15:04:38 +0100 Subject: [PATCH 24/76] fix: Leaderboard demo data loading (#1507) * Made get_scores error tolerant * Added join_revisions, made get_scores failsafe * Fetching metadata fixed fr HF models * Added failsafe metadata fetching to leaderboard code * Added revision joining to leaderboard app * fix * Only show models that have metadata, when filter_models is called * Ran linting --- mteb/leaderboard/app.py | 2 +- mteb/leaderboard/table.py | 10 +- mteb/load_results/benchmark_results.py | 175 ++++++++++++++++++------- mteb/models/overview.py | 41 +++++- 4 files changed, 169 insertions(+), 59 deletions(-) diff --git a/mteb/leaderboard/app.py b/mteb/leaderboard/app.py index 8a5eb961c1..913b23c829 100644 --- a/mteb/leaderboard/app.py +++ b/mteb/leaderboard/app.py @@ -81,7 +81,7 @@ def update_task_info(task_names: str) -> gr.DataFrame: return gr.DataFrame(df, datatype=["markdown"] + ["str"] * (len(df.columns) - 1)) -all_results = load_results().filter_models() +all_results = load_results().join_revisions() # Model sizes in million parameters min_model_size, max_model_size = 0, 10_000 diff --git a/mteb/leaderboard/table.py b/mteb/leaderboard/table.py index c965a7f682..734de2c238 100644 --- a/mteb/leaderboard/table.py +++ b/mteb/leaderboard/table.py @@ -98,6 +98,13 @@ def get_means_per_types(df: pd.DataFrame) -> pd.DataFrame: return pd.DataFrame.from_records(records) +def failsafe_get_model_meta(model_name): + try: + return get_model_meta(model_name) + except Exception as e: + return None + + def scores_to_tables( scores_long: list[dict], search_query: str | None = None ) -> tuple[gr.DataFrame, gr.DataFrame]: @@ -132,7 +139,8 @@ def scores_to_tables( joint_table["borda_rank"] = get_borda_rank(per_task) joint_table = joint_table.reset_index() joint_table = joint_table.drop(columns=["model_revision"]) - model_metas = joint_table["model_name"].map(get_model_meta) + model_metas = joint_table["model_name"].map(failsafe_get_model_meta) + joint_table = joint_table[model_metas.notna()] joint_table["model_link"] = model_metas.map(lambda m: m.reference) joint_table.insert( 1, diff --git a/mteb/load_results/benchmark_results.py b/mteb/load_results/benchmark_results.py index bf3fa5fe92..756024a4e6 100644 --- a/mteb/load_results/benchmark_results.py +++ b/mteb/load_results/benchmark_results.py @@ -1,12 +1,15 @@ from __future__ import annotations import json +import warnings from collections import defaultdict from collections.abc import Iterable from pathlib import Path -from typing import Any, Callable, Literal +from typing import Any, Callable, Literal, Optional import numpy as np +import pandas as pd +from packaging.version import InvalidVersion, Version from pydantic import BaseModel, ConfigDict from mteb.abstasks.AbsTask import AbsTask, ScoresDict @@ -89,36 +92,45 @@ def get_scores( format: Literal["wide", "long"] = "wide", ) -> dict | list: if format == "wide": - scores = { - res.task_name: res.get_score( - splits=splits, - languages=languages, - scripts=scripts, - getter=getter, - aggregation=aggregation, - ) - for res in self.task_results - } - return scores - if format == "long": - entries = [] - for task_res in self.task_results: - entry = dict( # noqa - model_name=self.model_name, - model_revision=self.model_revision, - task_name=task_res.task_name, - score=task_res.get_score( + scores = {} + for res in self.task_results: + try: + scores[res.task_name] = res.get_score( splits=splits, languages=languages, + scripts=scripts, getter=getter, aggregation=aggregation, - ), - mteb_version=task_res.mteb_version, - dataset_revision=task_res.dataset_revision, - evaluation_time=task_res.evaluation_time, - kg_co2_emissions=task_res.kg_co2_emissions, - ) - entries.append(entry) + ) + except Exception as e: + warnings.warn( + f"Couldn't get scores for {res.task_name} due to {e}." + ) + return scores + if format == "long": + entries = [] + for task_res in self.task_results: + try: + entry = dict( # noqa + model_name=self.model_name, + model_revision=self.model_revision, + task_name=task_res.task_name, + score=task_res.get_score( + splits=splits, + languages=languages, + getter=getter, + aggregation=aggregation, + ), + mteb_version=task_res.mteb_version, + dataset_revision=task_res.dataset_revision, + evaluation_time=task_res.evaluation_time, + kg_co2_emissions=task_res.kg_co2_emissions, + ) + entries.append(entry) + except Exception as e: + warnings.warn( + f"Couldn't get scores for {task_res.task_name} due to {e}." + ) return entries def __iter__(self): @@ -198,6 +210,8 @@ def filter_models( n_parameters_range: tuple[int | None, int | None] = (None, None), use_instructions: bool | None = None, ) -> BenchmarkResults: + # if model_names is None: + # model_names = [model_res.model_name for model_res in self] model_metas = get_model_metas( model_names=model_names, languages=languages, @@ -206,13 +220,64 @@ def filter_models( n_parameters_range=n_parameters_range, use_instructions=use_instructions, ) - model_revision_pairs = {(meta.name, meta.revision) for meta in model_metas} + models = {meta.name for meta in model_metas} + # model_revision_pairs = {(meta.name, meta.revision) for meta in model_metas} new_model_results = [] for model_res in self: - if (model_res.model_name, model_res.model_revision) in model_revision_pairs: + if model_res.model_name in models: new_model_results.append(model_res) return type(self).model_construct(model_results=new_model_results) + def join_revisions(self): + def parse_version(version_str: str) -> Optional[Version]: + try: + return Version(version_str) + except (InvalidVersion, TypeError): + return None + + def keep_best(group: pd.DataFrame) -> pd.DataFrame: + is_main_revision = group["revision"] == group["main_revision"] + if is_main_revision.sum() == 1: + return group[is_main_revision] + if group["mteb_version"].notna().any(): + group = group.dropna(subset=["mteb_version"]) + group = group.sort_values("mteb_version", ascending=False) + return group.head(n=1) + return group.head(n=1) + + records = [] + for model_result in self: + for task_result in model_result: + records.append( + dict( + model=model_result.model_name, + revision=model_result.model_revision, + task_name=task_result.task_name, + mteb_version=task_result.mteb_version, + task_result=task_result, + ) + ) + task_df = pd.DataFrame.from_records(records) + model_to_main_revision = { + meta.name: meta.revision for meta in get_model_metas() + } + task_df["main_revision"] = task_df["model"].map(model_to_main_revision) + task_df["mteb_version"] = task_df["mteb_version"].map(parse_version) + task_df = ( + task_df.groupby(["model", "task_name"]) + .apply(keep_best) + .reset_index(drop=True) + ) + model_results = [] + for (model, model_revision), group in task_df.groupby(["model", "revision"]): + model_result = ModelResult.model_construct( + model_name=model, + model_revision=model_revision, + task_results=list(group["task_result"]), + ) + model_results.append(model_result) + return BenchmarkResults.model_construct(model_results=model_results) + def get_scores( self, splits: list[Split] | None = None, @@ -225,33 +290,43 @@ def get_scores( entries = [] if format == "wide": for model_res in self: - model_scores = model_res.get_scores( - splits=splits, - languages=languages, - scripts=scripts, - getter=getter, - aggregation=aggregation, - format="wide", - ) - entries.append( - { - "model": model_res.model_name, - "revision": model_res.model_revision, - **model_scores, - } - ) - if format == "long": - for model_res in self: - entries.extend( - model_res.get_scores( + try: + model_scores = model_res.get_scores( splits=splits, languages=languages, scripts=scripts, getter=getter, aggregation=aggregation, - format="long", + format="wide", + ) + entries.append( + { + "model": model_res.model_name, + "revision": model_res.model_revision, + **model_scores, + } + ) + except Exception as e: + warnings.warn( + f"Couldn't get scores for {model_res.model_name}({model_res.model_revision}), due to: {e}" + ) + if format == "long": + for model_res in self: + try: + entries.extend( + model_res.get_scores( + splits=splits, + languages=languages, + scripts=scripts, + getter=getter, + aggregation=aggregation, + format="long", + ) + ) + except Exception as e: + warnings.warn( + f"Couldn't get scores for {model_res.model_name}({model_res.model_revision}), due to: {e}" ) - ) return entries def __iter__(self): diff --git a/mteb/models/overview.py b/mteb/models/overview.py index 91b84e38d8..f54a085d02 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -2,8 +2,10 @@ import logging from collections.abc import Iterable +from functools import lru_cache from typing import Any +from huggingface_hub import ModelCard from sentence_transformers import SentenceTransformer from mteb.encoder_interface import Encoder @@ -152,21 +154,46 @@ def get_model_meta(model_name: str, revision: str | None = None) -> ModelMeta: return MODEL_REGISTRY[model_name] else: # assume it is a sentence-transformers model logger.info( - "Model not found in model registry, assuming it is a sentence-transformers model." + "Model not found in model registry, assuming it is on HF Hub model." ) logger.info( - f"Attempting to extract metadata by loading the model ({model_name}) using sentence-transformers." + f"Attempting to extract metadata by loading the model ({model_name}) using HuggingFace." ) - model = SentenceTransformer( - model_name, revision=revision, trust_remote_code=True - ) - meta = model_meta_from_sentence_transformers(model) - + meta = model_meta_from_hf_hub(model_name) meta.revision = revision meta.name = model_name return meta +@lru_cache +def model_meta_from_hf_hub(model_name: str) -> ModelMeta: + try: + card = ModelCard.load(model_name) + card_data = card.data.to_dict() + frameworks = ["PyTorch"] + if card_data.get("library_name", None) == "sentence-transformers": + frameworks.append("Sentence Transformers") + return ModelMeta( + name=model_name, + revision=None, + # TODO + release_date=None, + # TODO: We need a mapping between conflicting language codes + languages=None, + license=card_data.get("license", None), + framework=frameworks, + public_training_data=bool(card_data.get("datasets", None)), + ) + except Exception as e: + logger.warning(f"Failed to extract metadata from model: {e}.") + return ModelMeta( + name=None, + revision=None, + languages=None, + release_date=None, + ) + + def model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMeta: try: name = ( From 594f643f127cdb6836b11f41c455b51362136240 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 27 Nov 2024 14:08:01 +0000 Subject: [PATCH 25/76] 1.20.2 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6d36d2b7ad..6463f79b49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.20.1" +version = "1.20.2" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 35245d36248c0105accaace879f4662def52f5c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20Kardos?= Date: Wed, 27 Nov 2024 15:37:56 +0100 Subject: [PATCH 26/76] fix: leaderboard only shows models that have ModelMeta (#1508) Filtering for models that have metadata --- mteb/leaderboard/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/leaderboard/app.py b/mteb/leaderboard/app.py index 913b23c829..c62788fb3e 100644 --- a/mteb/leaderboard/app.py +++ b/mteb/leaderboard/app.py @@ -81,7 +81,7 @@ def update_task_info(task_names: str) -> gr.DataFrame: return gr.DataFrame(df, datatype=["markdown"] + ["str"] * (len(df.columns) - 1)) -all_results = load_results().join_revisions() +all_results = load_results().join_revisions().filter_models() # Model sizes in million parameters min_model_size, max_model_size = 0, 10_000 From 92827964011195665486c2741c1e46d40e24855b Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 27 Nov 2024 14:43:43 +0000 Subject: [PATCH 27/76] 1.20.3 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6463f79b49..9ddcdca3b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.20.2" +version = "1.20.3" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 942f2125dce5534a167416eefe322dcc71dcbcfe Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Thu, 28 Nov 2024 03:03:28 +0500 Subject: [PATCH 28/76] fix: align readme with current mteb (#1493) * align readme with current mteb * align with mieb branch * fix test --- README.md | 9 +++++---- mteb/evaluation/MTEB.py | 5 ++--- tests/test_benchmark/mock_models.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index ef87ec4370..3c659bbde5 100644 --- a/README.md +++ b/README.md @@ -46,10 +46,8 @@ from sentence_transformers import SentenceTransformer # Define the sentence-transformers model name model_name = "average_word_embeddings_komninos" -# or directly from huggingface: -# model_name = "sentence-transformers/all-MiniLM-L6-v2" -model = SentenceTransformer(model_name) +model = mteb.get_model(model_name) # if the model is not implemented in MTEB it will be eq. to SentenceTransformer(model_name) tasks = mteb.get_tasks(tasks=["Banking77Classification"]) evaluation = mteb.MTEB(tasks=tasks) results = evaluation.run(model, output_folder=f"results/{model_name}") @@ -220,7 +218,10 @@ Note that the public leaderboard uses the test splits for all datasets except MS Models should implement the following interface, implementing an `encode` function taking as inputs a list of sentences, and returning a list of embeddings (embeddings can be `np.array`, `torch.tensor`, etc.). For inspiration, you can look at the [mteb/mtebscripts repo](https://github.com/embeddings-benchmark/mtebscripts) used for running diverse models via SLURM scripts for the paper. ```python +import mteb from mteb.encoder_interface import PromptType +import numpy as np + class CustomModel: def encode( @@ -244,7 +245,7 @@ class CustomModel: pass model = CustomModel() -tasks = mteb.get_task("Banking77Classification") +tasks = mteb.get_tasks(tasks=["Banking77Classification"]) evaluation = MTEB(tasks=tasks) evaluation.run(model) ``` diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index 05a3c02ba4..1374c1ce11 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -13,7 +13,7 @@ from typing import Any import datasets -from sentence_transformers import SentenceTransformer +from sentence_transformers import CrossEncoder, SentenceTransformer from mteb.encoder_interface import Encoder from mteb.model_meta import ModelMeta @@ -23,7 +23,6 @@ from ..abstasks import AbsTask from ..load_results.task_results import TaskResult from ..models.sentence_transformer_wrapper import SentenceTransformerWrapper -from ..models.wrapper import Wrapper from ..tasks import * from . import LangMapping @@ -363,7 +362,7 @@ def run( meta = self.create_model_meta(model) output_path = self.create_output_folder(meta, output_folder) - if not isinstance(model, Wrapper): + if isinstance(model, (SentenceTransformer, CrossEncoder)): model = SentenceTransformerWrapper(model) if output_path: diff --git a/tests/test_benchmark/mock_models.py b/tests/test_benchmark/mock_models.py index e0b9cf69df..6b26cf67d4 100644 --- a/tests/test_benchmark/mock_models.py +++ b/tests/test_benchmark/mock_models.py @@ -33,7 +33,7 @@ def encode(self, sentences, prompt_name: str | None = None, **kwargs): return torch.randn(len(sentences), 10).numpy() -class MockTorchbf16Encoder(mteb.Encoder): +class MockTorchbf16Encoder(SentenceTransformer): def __init__(self): pass From 09f004c96c3e0e12098163a200082944759098e7 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 27 Nov 2024 22:19:06 +0000 Subject: [PATCH 29/76] 1.20.4 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9ddcdca3b9..333232ed9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.20.3" +version = "1.20.4" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From cfd43aca70173b93a4d1163b1e6afd52eef41372 Mon Sep 17 00:00:00 2001 From: Isaac Chung Date: Thu, 28 Nov 2024 14:14:28 +0200 Subject: [PATCH 30/76] docs: Add lang family mapping and map to task table (#1486) * add lang family mapping and map to task table * make lint * add back some unclassified lang codes --- docs/create_tasks_table.py | 18 +- docs/tasks.md | 2106 +- mteb/language_family.json | 62611 ++++++++++++++++++++ mteb/languages.py | 7 +- scripts/create_language_family_mapping.py | 47 + 5 files changed, 63732 insertions(+), 1057 deletions(-) create mode 100644 mteb/language_family.json create mode 100644 scripts/create_language_family_mapping.py diff --git a/docs/create_tasks_table.py b/docs/create_tasks_table.py index 13e9830276..606f349cf2 100644 --- a/docs/create_tasks_table.py +++ b/docs/create_tasks_table.py @@ -8,6 +8,7 @@ import mteb from mteb.abstasks.TaskMetadata import PROGRAMMING_LANGS, TASK_TYPE +from mteb.languages import ISO_TO_FAM_LEVEL0, ISO_TO_LANGUAGE def author_from_bibtex(bibtex: str | None) -> str: @@ -82,10 +83,21 @@ def create_task_lang_table(tasks: list[mteb.AbsTask], sort_by_sum=False) -> str: ## Wrangle for polars pl_table_dict = [] for lang, d in table_dict.items(): - d.update({"0-lang": lang}) # for sorting columns + d.update({"0-lang-code": lang}) # for sorting columns pl_table_dict.append(d) - df = pl.DataFrame(pl_table_dict).sort(by="0-lang") + df = pl.DataFrame(pl_table_dict).sort(by="0-lang-code") + df = df.with_columns( + pl.col("0-lang-code") + .replace_strict(ISO_TO_LANGUAGE, default="unknown") + .alias("1-lang-name") + ) + df = df.with_columns( + pl.col("0-lang-code") + .replace_strict(ISO_TO_FAM_LEVEL0, default="Unclassified") + .alias("2-lang-fam") + ) + df = df.with_columns(sum=pl.sum_horizontal(get_args(TASK_TYPE))) df = df.select(sorted(df.columns)) if sort_by_sum: @@ -96,7 +108,7 @@ def create_task_lang_table(tasks: list[mteb.AbsTask], sort_by_sum=False) -> str: task_names_md = " | ".join(sorted(get_args(TASK_TYPE))) horizontal_line_md = "---|---" * (len(sorted(get_args(TASK_TYPE))) + 1) table = f""" -| Language | {task_names_md} | Sum | +| ISO Code | Language | Family | {task_names_md} | Sum | |{horizontal_line_md}| """ diff --git a/docs/tasks.md b/docs/tasks.md index 194f7ba70f..23bb246cf5 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -608,1060 +608,1060 @@ The following tables give you an overview of the tasks in MTEB.
-| Language | BitextMining | Classification | Clustering | InstructionRetrieval | MultilabelClassification | PairClassification | Reranking | Retrieval | STS | Speed | Summarization | Sum | +| ISO Code | Language | Family | BitextMining | Classification | Clustering | InstructionRetrieval | MultilabelClassification | PairClassification | Reranking | Retrieval | STS | Speed | Summarization | Sum | |---|------|------|------|------|------|------|------|------|------|------|------|---| -| aai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| aak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| aau | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| aaz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| abs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| abt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| abx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| aby | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ace | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | -| acf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| acm | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | -| acq | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| acr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| acu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| adz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| aeb | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| aer | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| aey | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| afr | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 10 | -| agd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| agg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| agm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| agn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| agr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| agt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| agu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| aia | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| aii | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ajp | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| aka | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| ake | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| alp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| alq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| als | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | -| aly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ame | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| amf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| amh | 3 | 6 | 3 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 14 | -| amk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| amm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| amn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| amo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| amp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| amr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| amu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| amx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ang | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| anh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| anp | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| anv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| aoi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| aoj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| aom | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| aon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| apb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| apc | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | -| ape | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| apn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| apr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| apu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| apw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| apz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ara | 2 | 12 | 0 | 0 | 0 | 2 | 1 | 9 | 2 | 0 | 0 | 28 | -| arb | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 8 | -| are | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| arl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| arn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| arp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| arq | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 4 | -| ars | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | -| ary | 1 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 7 | -| arz | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | -| asm | 5 | 3 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 14 | -| aso | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ast | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| ata | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| atb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| atd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| atg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| att | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| auc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| aui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| auy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| avt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| awa | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | -| awb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| awk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| awx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ayr | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| azb | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| aze | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| azg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| azj | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | -| azz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bak | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | -| bam | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | -| ban | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | -| bao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bba | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bbb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bbc | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | -| bbr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bco | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bdd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bef | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bel | 4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | -| bem | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| ben | 7 | 9 | 2 | 0 | 0 | 1 | 2 | 6 | 1 | 0 | 0 | 28 | -| beo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ber | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| beu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bew | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| bgc | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| bgs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bgt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bhb | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bhd | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bhg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bhl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bho | 2 | 2 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 6 | -| bhp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| big | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bjj | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bjk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bjn | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | -| bjp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bjr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bjv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bjz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bkd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bki | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bkq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bkx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| blw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| blz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bmh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bmk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bmr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bmu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bnp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bns | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| boa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bod | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | -| boj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bos | 3 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | -| box | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| boy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bpr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bps | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bqc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bqp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bra | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bre | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| brx | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | -| bsj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bsn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bsp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bug | 2 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | -| buk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bul | 3 | 4 | 1 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 0 | 13 | -| bus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bvd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bvr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bxh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| byr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| byx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bzd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bzh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bzj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| caa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| caf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| car | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cat | 3 | 2 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | -| cav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cax | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cbi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cbk | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| cbr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cbs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cbt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cbu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cbv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cco | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ceb | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | -| cek | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ces | 4 | 5 | 2 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 0 | 16 | -| cgc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cha | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| chd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| chf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| chk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| chq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| chv | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| chz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cjk | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| cjo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cjv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ckb | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | -| cle | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| clu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cme | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cmn | 4 | 10 | 4 | 0 | 0 | 3 | 4 | 10 | 9 | 0 | 0 | 44 | -| cmo | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| cni | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cnl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cnt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| code | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | 37 | -| cof | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| con | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cor | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cot | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cpa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cpb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cpc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cpu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cpy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| crh | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| crn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| crx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| csb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cso | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| csy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cta | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cth | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ctp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ctu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cuc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cuk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cut | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cux | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cwe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cya | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| cym | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | -| daa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dan | 5 | 9 | 2 | 0 | 1 | 0 | 1 | 5 | 0 | 0 | 0 | 23 | -| ded | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| deu | 6 | 14 | 7 | 0 | 1 | 6 | 2 | 18 | 4 | 0 | 0 | 58 | -| dgc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dgr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dgz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dhg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dik | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| div | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dji | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| djk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| djr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dob | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| doi | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| dop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dov | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dsb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dtp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dwr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dww | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dwy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dyu | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| dza | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| dzo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| ebk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| eko | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ell | 3 | 6 | 1 | 0 | 1 | 2 | 0 | 3 | 0 | 0 | 0 | 16 | -| emi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| emp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| eng | 16 | 143 | 16 | 3 | 1 | 8 | 8 | 92 | 13 | 2 | 1 | 303 | -| enq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| epo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | -| eri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ese | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| esk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| est | 2 | 2 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 8 | -| etr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| eus | 3 | 2 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | -| ewe | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | -| faa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| fai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| fao | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | -| far | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| fas | 1 | 4 | 0 | 0 | 0 | 1 | 2 | 9 | 0 | 0 | 0 | 17 | -| ffm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| fij | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| fil | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| fin | 3 | 5 | 1 | 0 | 1 | 1 | 2 | 5 | 1 | 0 | 0 | 19 | -| fon | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| for | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| fra | 7 | 13 | 8 | 0 | 1 | 5 | 3 | 15 | 4 | 0 | 1 | 57 | -| fry | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| fuc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| fue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| fuf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| fuh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| fur | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| fuv | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | -| gah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gaw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gaz | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | -| gbm | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| gdn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gdr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| geb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gfk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ghs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gla | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| gle | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | -| glg | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | -| glk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| glv | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gmv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gng | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gnn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gnw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gof | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gom | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| grc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| grn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | -| gsw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| guh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| guj | 6 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 18 | -| gul | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gum | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gun | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| guo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gup | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gux | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gvc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gvf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gvs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gwi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gym | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| gyr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hat | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | -| hau | 4 | 5 | 3 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 14 | -| haw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hbo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| heb | 4 | 5 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 11 | -| heg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hin | 9 | 12 | 2 | 0 | 0 | 1 | 2 | 10 | 2 | 0 | 0 | 38 | -| hix | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hla | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hlt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hmn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hmo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hne | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | -| hns | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hot | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hrv | 4 | 3 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 10 | -| hsb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hto | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hui | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hun | 5 | 3 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 12 | -| hus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| huu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| huv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hye | 3 | 3 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 9 | -| ian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ibo | 3 | 5 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 12 | -| ido | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ign | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ikk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ikw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ile | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ilo | 2 | 1 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | -| imo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ina | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| inb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ind | 6 | 7 | 1 | 0 | 0 | 1 | 1 | 4 | 1 | 0 | 0 | 21 | -| ino | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| iou | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ipi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| isl | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | -| isn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ita | 5 | 9 | 1 | 0 | 1 | 2 | 1 | 5 | 3 | 0 | 0 | 27 | -| iws | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ixl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| jac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| jae | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| jao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| jav | 4 | 7 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 13 | -| jic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| jid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| jiv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| jni | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| jpn | 5 | 8 | 3 | 0 | 0 | 1 | 3 | 13 | 2 | 0 | 0 | 35 | -| jvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kab | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| kac | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | -| kam | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| kan | 6 | 7 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 19 | -| kaq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kas | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | -| kat | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 10 | -| kaz | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | -| kbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kbh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kbm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kbp | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| kbq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kdc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kde | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kdl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kea | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | -| kek | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ken | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kew | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kfg | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kfy | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kgf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kgk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kgp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| khk | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | -| khm | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | -| khs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| khz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kik | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| kin | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 8 | -| kir | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | -| kiw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kiz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kje | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kjs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kkc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kkl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| klt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| klv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kmb | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| kmg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kmh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kmk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kmo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kmr | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| kms | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kmu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| knc | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| kne | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| knf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| knj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| knv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kon | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| kor | 4 | 8 | 1 | 0 | 1 | 2 | 1 | 9 | 3 | 0 | 0 | 29 | -| kos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kpf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kpg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kpj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kpr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kpw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kpx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kqa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kqc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kqf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kql | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kqw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| krc | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ksd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ksj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ksr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ktm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kto | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kud | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kup | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kur | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| kvg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kwd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kwf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kwi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kwj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kyc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kyf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kyg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kyq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kyz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kze | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| kzj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| lac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| lao | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | -| lat | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| lav | 1 | 2 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | -| lbb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| lbk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| lcm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| leu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| lex | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| lfn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| lgl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| lid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| lif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| lij | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| lim | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| lin | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | -| lit | 4 | 1 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | -| llg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| lmo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| ltg | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| ltz | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| lua | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| lug | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | -| luo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | -| lus | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| lvs | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | -| lww | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| maa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mad | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | -| mag | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| mai | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | -| maj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mak | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| mal | 7 | 7 | 2 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 19 | -| mam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| maq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mar | 7 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 2 | 0 | 0 | 20 | -| mau | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| max | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| maz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mbb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mbh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mbj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mbl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mbs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mbt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mcb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mcd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mcf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mco | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mcp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mcq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mcr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mdy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| med | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mee | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mek | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| meq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| met | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| meu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mey | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mgc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mgh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mgw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mhl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mhr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mib | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mie | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mig | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mih | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mil | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| min | 3 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | -| mio | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mir | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| miz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mjc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mkd | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | -| mkj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mkl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mkn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mks | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mle | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mlg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mlh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mlp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mlt | 2 | 2 | 2 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | -| mmo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mmx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mna | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mni | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | -| mon | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| mop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mos | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| mox | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mph | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mpj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mpm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mpp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mps | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mpt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mpx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mqb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mqj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mri | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | -| msa | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| msb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| msc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| msk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| msm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| msy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mti | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mto | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mui | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| mup | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| mux | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| muy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mva | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mwc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mwe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mwf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mwp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mwr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mxb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mxp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mxq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mxt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mya | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | -| myk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| myu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| myw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| myy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| mzz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| naf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nas | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nbl | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nbq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nch | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ncj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ncl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ncu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nde | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ndg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ndj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nds | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nep | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| nfa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ngp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ngu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nhe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nhg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nhi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nho | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nhr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nhu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nhw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nhy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nii | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nij | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| nin | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nko | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nld | 6 | 6 | 1 | 0 | 1 | 0 | 1 | 2 | 2 | 0 | 0 | 19 | -| nlg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nna | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nno | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | -| nnq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| noa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nob | 4 | 7 | 5 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 19 | -| noe | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nor | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 3 | -| not | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nou | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nov | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| npi | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | -| npl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nqo | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| nsn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nso | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | -| nss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ntj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ntp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ntu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nus | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| nuy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nvm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nwi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nya | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | -| nys | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nyu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| obo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| oci | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| okv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| omw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ong | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ons | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ood | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| opm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ori | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| orm | 1 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| orv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ory | 5 | 4 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 15 | -| ote | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| otm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| otn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| otq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ots | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pag | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| pah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pan | 6 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 18 | -| pao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pap | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| pbt | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | -| pcm | 1 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | -| pes | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | -| pib | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pio | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pir | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| piu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pjt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pls | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| plt | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | -| plu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pma | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pms | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| poe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| poh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| poi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pol | 4 | 11 | 4 | 0 | 1 | 4 | 0 | 18 | 4 | 0 | 0 | 46 | -| pon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| por | 4 | 9 | 1 | 0 | 2 | 2 | 1 | 5 | 3 | 0 | 0 | 27 | -| poy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ppo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| prf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| prs | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| ptp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ptu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| pus | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| pwg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| qub | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| quc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| quf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| quh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| qul | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| qup | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| quy | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| qvc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| qve | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| qvh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| qvm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| qvn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| qvs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| qvw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| qvz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| qwh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| qxh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| qxn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| qxo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| rai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| raj | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| reg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| rej | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| rgu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| rkb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| rmc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| rmy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| rom | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| ron | 5 | 6 | 1 | 0 | 1 | 0 | 1 | 3 | 1 | 0 | 0 | 18 | -| roo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| rop | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| row | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| rro | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ruf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| rug | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| run | 1 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | -| rus | 5 | 13 | 6 | 0 | 2 | 4 | 2 | 16 | 4 | 0 | 0 | 52 | -| rwo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sag | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| sah | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| san | 5 | 3 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 10 | -| sat | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | -| sbe | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sbk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sbs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| scn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| sco | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| seh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sey | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sgb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sgz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| shi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| shj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| shn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | -| shp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sim | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sin | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | -| sja | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| slk | 3 | 4 | 1 | 0 | 1 | 0 | 0 | 3 | 0 | 0 | 0 | 12 | -| sll | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| slv | 3 | 4 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 10 | -| smk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| smo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| sna | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | -| snc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| snd | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | -| snn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| snp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| snx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sny | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| som | 3 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | -| soq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sot | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | -| soy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| spa | 4 | 13 | 4 | 0 | 1 | 2 | 2 | 13 | 4 | 0 | 0 | 43 | -| spl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| spm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| spp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sps | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| spy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sqi | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | -| srd | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| sri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| srm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| srn | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| srp | 4 | 1 | 1 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 9 | -| srq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ssd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ssg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ssw | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | -| ssx | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| stp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sua | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sun | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | -| sus | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| suz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| svk | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| swa | 1 | 7 | 2 | 0 | 0 | 1 | 1 | 3 | 0 | 0 | 0 | 15 | -| swe | 4 | 8 | 3 | 0 | 1 | 1 | 1 | 4 | 0 | 0 | 0 | 22 | -| swg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| swh | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | -| swp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| sxb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| szl | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| tac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tah | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| taj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tam | 7 | 7 | 2 | 0 | 0 | 1 | 0 | 3 | 1 | 0 | 0 | 21 | -| taq | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| tat | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | -| tav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| taw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tbc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tbf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tbg | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tbo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tbz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tcs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tcz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tdt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tee | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tel | 7 | 7 | 2 | 0 | 0 | 0 | 1 | 5 | 2 | 0 | 0 | 24 | -| ter | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tet | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tew | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tfr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tgk | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | -| tgl | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | -| tgo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tgp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tha | 4 | 8 | 1 | 0 | 0 | 1 | 1 | 6 | 0 | 0 | 0 | 21 | -| tif | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tim | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tir | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | -| tiw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tiy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tke | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tku | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tlf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tmd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tna | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tnc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tnk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tnn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tnp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| toc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tod | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tof | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| toj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ton | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| too | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| top | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tpa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tpi | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | -| tpt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tpz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| trc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tsn | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | -| tso | 1 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | -| tsw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ttc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tte | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tuc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tue | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tuf | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tuk | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | -| tum | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| tuo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tur | 4 | 7 | 1 | 0 | 0 | 2 | 0 | 3 | 2 | 0 | 0 | 19 | -| tvk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| twi | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | -| txq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| txu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tyv | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tzj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tzl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tzm | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| tzo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ubr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ubu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| udu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| uig | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | -| ukr | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | -| uli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ulk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| umb | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| upv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ura | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| urb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| urd | 7 | 8 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 19 | -| uri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| urt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| urw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| usa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| usp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| uvh | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| uvl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| uzb | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| uzn | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | -| vec | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| ven | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| vid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| vie | 5 | 6 | 1 | 0 | 0 | 1 | 0 | 5 | 0 | 0 | 0 | 18 | -| viv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| vmy | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| waj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wal | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| war | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | -| wat | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wbi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wbp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wed | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wer | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wim | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wiu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wiv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wln | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wmt | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wmw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wnc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wnu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wol | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | -| wos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wrk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wro | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wrs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wsk | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wuu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| wuv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| xav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| xbi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| xed | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| xho | 3 | 3 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 10 | -| xla | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| xnn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| xon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| xsi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| xtd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| xtm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yaa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yal | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yaq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yby | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ycn | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ydd | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| yid | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yka | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yle | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yml | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yon | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yor | 4 | 5 | 3 | 0 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | 16 | -| yrb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yre | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yss | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yue | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | -| yuj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yut | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yuw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| yva | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zaa | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zab | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zad | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zai | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zaj | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zam | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zao | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zap | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zar | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zas | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zat | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zav | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zaw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zca | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zga | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zho | 2 | 2 | 1 | 0 | 0 | 1 | 1 | 13 | 0 | 0 | 0 | 20 | -| zia | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ziw | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zlm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zos | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zpc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zpl | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zpm | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zpo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zpq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zpu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zpv | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zpz | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zsm | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | -| zsr | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ztq | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zty | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zul | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | -| zyp | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| Total | None | 1394 | 795 | 304 | 3 | 28 | 67 | 50 | 460 | 85 | 2 | 2 | +| aai | Arifama-Miniafia | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aak | Ankave | Angan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aau | Abau | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aaz | Amarasi | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| abs | Ambonese Malay | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| abt | Ambulas | Ndu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| abx | Inabaknon | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aby | Aneme Wake | Yareban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ace | Achinese | Austronesian | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| acf | Saint Lucian Creole French | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| acm | Mesopotamian Arabic | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| acq | Ta'izzi-Adeni Arabic | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| acr | Achi | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| acu | Achuar-Shiwiar | Chicham | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| adz | Adzera | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aeb | Tunisian Arabic | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| aer | Eastern Arrernte | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aey | Amele | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| afr | Afrikaans | Indo-European | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 10 | +| agd | Agarabi | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agg | Angor | Senagi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agm | Angaataha | Angan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agn | Agutaynen | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agr | Aguaruna | Chicham | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agt | Central Cagayan Agta | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| agu | Aguacateco | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aia | Arosi | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aii | Assyrian Neo-Aramaic | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ajp | South Levantine Arabic | Unclassified | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| aka | Akan | Atlantic-Congo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ake | Akawaio | Cariban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| alp | Alune | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| alq | Algonquin | Algic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| als | Tosk Albanian | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| aly | Alyawarr | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ame | Yanesha' | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amf | Hamer-Banna | South Omotic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amh | Amharic | Afro-Asiatic | 3 | 6 | 3 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 14 | +| amk | Ambai | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amm | Ama (Papua New Guinea) | Left May | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amn | Amanab | Border | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amo | Amo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amp | Alamblak | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amr | Amarakaeri | Harakmbut | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amu | Guerrero Amuzgo | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amx | Anmatyerre | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ang | Old English (ca. 450-1100) | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| anh | Nend | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| anp | Angika | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| anv | Denya | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aoi | Anindilyakwa | Gunwinyguan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aoj | Mufian | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aom | Ömie | Koiarian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aon | Bumbita Arapesh | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apb | Sa'a | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apc | Levantine Arabic | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| ape | Bukiyip | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apn | Apinayé | Nuclear-Macro-Je | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apr | Arop-Lokep | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apu | Apurinã | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apw | Western Apache | Athabaskan-Eyak-Tlingit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| apz | Safeyoka | Angan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ara | Arabic | Unclassified | 2 | 12 | 0 | 0 | 0 | 2 | 1 | 9 | 2 | 0 | 0 | 28 | +| arb | Standard Arabic | Afro-Asiatic | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 8 | +| are | Western Arrarnta | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| arl | Arabela | Zaparoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| arn | Mapudungun | Araucanian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| arp | Arapaho | Algic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| arq | Algerian Arabic | Afro-Asiatic | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 4 | +| ars | Najdi Arabic | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| ary | Moroccan Arabic | Afro-Asiatic | 1 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 7 | +| arz | Egyptian Arabic | Afro-Asiatic | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| asm | Assamese | Indo-European | 5 | 3 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 14 | +| aso | Dano | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ast | Asturian | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ata | Pele-Ata | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| atb | Zaiwa | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| atd | Ata Manobo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| atg | Ivbie North-Okpela-Arhe | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| att | Pamplona Atta | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| auc | Waorani | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| aui | Anuki | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| auy | Awiyaana | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| avt | Au | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| awa | Awadhi | Indo-European | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| awb | Awa (Papua New Guinea) | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| awk | Awabakal | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| awx | Awara | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ayr | Central Aymara | Aymaran | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| azb | South Azerbaijani | Turkic | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| aze | Azerbaijani | Unclassified | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| azg | San Pedro Amuzgos Amuzgo | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| azj | North Azerbaijani | Turkic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| azz | Highland Puebla Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bak | Bashkir | Turkic | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| bam | Bambara | Mande | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| ban | Balinese | Austronesian | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| bao | Waimaha | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bba | Baatonum | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bbb | Barai | Koiarian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bbc | Batak Toba | Austronesian | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| bbr | Girawa | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bch | Bariai | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bco | Kaluli | Bosavi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bdd | Bunama | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bea | Beaver | Athabaskan-Eyak-Tlingit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bef | Benabena | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bel | Belarusian | Indo-European | 4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| bem | Bemba (Zambia) | Atlantic-Congo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ben | Bengali | Indo-European | 7 | 9 | 2 | 0 | 0 | 1 | 2 | 6 | 1 | 0 | 0 | 28 | +| beo | Beami | Bosavi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ber | Berber (Other) | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| beu | Blagar | Timor-Alor-Pantar | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bew | Betawi | Austronesian | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| bgc | Haryanvi | Indo-European | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| bgs | Tagabawa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bgt | Bughotu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bhb | Bhili | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bhd | Bhadrawahi | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bhg | Binandere | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bhl | Bimin | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bho | Bhojpuri | Indo-European | 2 | 2 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 6 | +| bhp | Bima | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| big | Biangai | Kunimaipan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjj | Kanauji | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjk | Barok | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjn | Banjar | Austronesian | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| bjp | Fanamaket | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjr | Binumarien | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjv | Bedjond | Central Sudanic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bjz | Baruga | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bkd | Binukid | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bki | Baki | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bkq | Bakairí | Cariban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bkx | Baikeno | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| blw | Balangao | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| blz | Balantak | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bmh | Kein | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bmk | Ghayavi | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bmr | Muinane | Boran | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bmu | Somba-Siawari | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bnp | Bola | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bns | Bundeli | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| boa | Bora | Boran | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bod | Tibetan | Sino-Tibetan | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| boj | Anjam | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bon | Bine | Eastern Trans-Fly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bos | Bosnian | Unclassified | 3 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| box | Buamu | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| boy | Bodo (Central African Republic) | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bpr | Koronadal Blaan | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bps | Sarangani Blaan | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bqc | Boko (Benin) | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bqp | Busa | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bra | Braj | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bre | Breton | Indo-European | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| brx | Bodo (India) | Sino-Tibetan | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| bsj | Bangwinji | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bsn | Barasana-Eduria | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bsp | Baga Sitemu | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bss | Akoose | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bug | Buginese | Austronesian | 2 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| buk | Bugawac | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bul | Bulgarian | Indo-European | 3 | 4 | 1 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 0 | 13 | +| bus | Bokobaru | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bvd | Baeggu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bvr | Burarra | Maningrida | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bxh | Buhutu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| byr | Baruya | Angan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| byx | Qaqet | Baining | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bzd | Bribri | Chibchan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bzh | Mapos Buang | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| bzj | Belize Kriol English | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| caa | Chortí | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cab | Garifuna | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cac | Chuj | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| caf | Southern Carrier | Athabaskan-Eyak-Tlingit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cak | Kaqchikel | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cao | Chácobo | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cap | Chipaya | Uru-Chipaya | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| car | Galibi Carib | Cariban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cat | Catalan | Indo-European | 3 | 2 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| cav | Cavineña | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cax | Chiquitano | Chiquitano | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbc | Carapana | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbi | Chachi | Barbacoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbk | Chavacano | Indo-European | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| cbr | Cashibo-Cacataibo | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbs | Cashinahua | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbt | Chayahuita | Cahuapanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbu | Candoshi-Shapra | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cbv | Cacua | Kakua-Nukak | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cco | Comaltepec Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ceb | Cebuano | Austronesian | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| cek | Eastern Khumi Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ces | Czech | Indo-European | 4 | 5 | 2 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 0 | 16 | +| cgc | Kagayanen | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cha | Chamorro | Austronesian | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| chd | Highland Oaxaca Chontal | Tequistlatecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chf | Tabasco Chontal | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chk | Chuukese | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chq | Quiotepec Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chv | Chuvash | Turkic | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| chz | Ozumacín Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cjk | Chokwe | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| cjo | Ashéninka Pajonal | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cjv | Chuave | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ckb | Central Kurdish | Indo-European | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| cle | Lealao Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| clu | Caluyanun | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cme | Cerma | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cmn | Mandarin Chinese | Sino-Tibetan | 4 | 10 | 4 | 0 | 0 | 3 | 4 | 10 | 9 | 0 | 0 | 44 | +| cmo | Central Mnong | Austroasiatic | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| cni | Asháninka | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cnl | Lalana Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cnt | Tepetotutla Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| code | unknown | Unclassified | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | 37 | +| cof | Colorado | Barbacoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| con | Cofán | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cop | Coptic | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cor | Cornish | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cot | Caquinte | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpa | Palantla Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpb | Ucayali-Yurúa Ashéninka | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpc | Ajyíninka Apurucayali | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpu | Pichis Ashéninka | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cpy | South Ucayali Ashéninka | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| crh | Crimean Tatar | Turkic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| crn | El Nayar Cora | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| crx | Carrier | Athabaskan-Eyak-Tlingit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| csb | Kashubian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cso | Sochiapam Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| csy | Siyin Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cta | Tataltepec Chatino | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cth | Thaiphum Chin | Bookkeeping | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ctp | Western Highland Chatino | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ctu | Chol | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cub | Cubeo | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cuc | Usila Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cui | Cuiba | Guahiboan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cuk | San Blas Kuna | Chibchan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cut | Teutila Cuicatec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cux | Tepeuxila Cuicatec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cwe | Kwere | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cya | Nopala Chatino | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cym | Welsh | Indo-European | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | +| daa | Dangaléat | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dad | Marik | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dah | Gwahatike | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dan | Danish | Indo-European | 5 | 9 | 2 | 0 | 1 | 0 | 1 | 5 | 0 | 0 | 0 | 23 | +| ded | Dedua | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| deu | German | Indo-European | 6 | 14 | 7 | 0 | 1 | 6 | 2 | 18 | 4 | 0 | 0 | 58 | +| dgc | Casiguran Dumagat Agta | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dgr | Dogrib | Athabaskan-Eyak-Tlingit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dgz | Daga | Dagan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dhg | Dhangu-Djangu | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dif | Dieri | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dik | Southwestern Dinka | Nilotic | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| div | Dhivehi | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dji | Djinang | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| djk | Eastern Maroon Creole | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| djr | Djambarrpuyngu | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dob | Dobu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| doi | Dogri (macrolanguage) | Unclassified | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| dop | Lukpa | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dov | Dombe | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dsb | Lower Sorbian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dtp | Kadazan Dusun | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dwr | Dawro | Ta-Ne-Omotic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dww | Dawawa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dwy | Dhuwaya | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dyu | Dyula | Mande | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| dza | Tunzu | Atlantic-Congo | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| dzo | Dzongkha | Sino-Tibetan | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ebk | Eastern Bontok | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| eko | Koti | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ell | Modern Greek (1453-) | Indo-European | 3 | 6 | 1 | 0 | 1 | 2 | 0 | 3 | 0 | 0 | 0 | 16 | +| emi | Mussau-Emira | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| emp | Northern Emberá | Chocoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| eng | English | Indo-European | 16 | 143 | 16 | 3 | 1 | 8 | 8 | 92 | 13 | 2 | 1 | 303 | +| enq | Enga | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| epo | Esperanto | Artificial Language | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| eri | Ogea | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ese | Ese Ejja | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| esk | Northwest Alaska Inupiatun | Eskimo-Aleut | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| est | Estonian | Unclassified | 2 | 2 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 8 | +| etr | Edolo | Bosavi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| eus | Basque | Unclassified | 3 | 2 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| ewe | Ewe | Atlantic-Congo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| faa | Fasu | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fai | Faiwol | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fao | Faroese | Indo-European | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | +| far | Fataleka | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fas | Persian | Unclassified | 1 | 4 | 0 | 0 | 0 | 1 | 2 | 9 | 0 | 0 | 0 | 17 | +| ffm | Maasina Fulfulde | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fij | Fijian | Austronesian | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| fil | Filipino | Austronesian | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| fin | Finnish | Uralic | 3 | 5 | 1 | 0 | 1 | 1 | 2 | 5 | 1 | 0 | 0 | 19 | +| fon | Fon | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| for | Fore | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fra | French | Indo-European | 7 | 13 | 8 | 0 | 1 | 5 | 3 | 15 | 4 | 0 | 1 | 57 | +| fry | Western Frisian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fuc | Pulaar | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fue | Borgu Fulfulde | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fuf | Pular | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fuh | Western Niger Fulfulde | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| fur | Friulian | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| fuv | Nigerian Fulfulde | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| gah | Alekano | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gai | Borei | Ramu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gam | Kandawo | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gaw | Nobonob | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gaz | West Central Oromo | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| gbm | Garhwali | Indo-European | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| gdn | Umanakaina | Dagan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gdr | Wipi | Eastern Trans-Fly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| geb | Kire | Ramu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gfk | Patpatar | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ghs | Guhu-Samane | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gla | Scottish Gaelic | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| gle | Irish | Indo-European | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| glg | Galician | Indo-European | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| glk | Gilaki | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| glv | Manx | Indo-European | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gmv | Gamo | Ta-Ne-Omotic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gng | Ngangam | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gnn | Gumatj | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gnw | Western Bolivian Guaraní | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gof | Gofa | Ta-Ne-Omotic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gom | Goan Konkani | Indo-European | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| grc | Ancient Greek (to 1453) | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| grn | Guarani | Unclassified | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| gsw | Swiss German | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gub | Guajajára | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| guh | Guahibo | Guahiboan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gui | Eastern Bolivian Guaraní | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| guj | Gujarati | Indo-European | 6 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 18 | +| gul | Sea Island Creole English | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gum | Guambiano | Barbacoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gun | Mbyá Guaraní | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| guo | Guayabero | Guahiboan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gup | Gunwinggu | Gunwinyguan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gux | Gourmanchéma | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gvc | Guanano | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gvf | Golin | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gvn | Kuku-Yalanji | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gvs | Gumawana | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gwi | Gwichʼin | Athabaskan-Eyak-Tlingit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gym | Ngäbere | Chibchan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| gyr | Guarayu | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hat | Haitian | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| hau | Hausa | Afro-Asiatic | 4 | 5 | 3 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 14 | +| haw | Hawaiian | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hbo | Ancient Hebrew | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hch | Huichol | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| heb | Hebrew | Afro-Asiatic | 4 | 5 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 11 | +| heg | Helong | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hin | Hindi | Indo-European | 9 | 12 | 2 | 0 | 0 | 1 | 2 | 10 | 2 | 0 | 0 | 38 | +| hix | Hixkaryána | Cariban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hla | Halia | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hlt | Matu Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hmn | Hmong | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hmo | Hiri Motu | Pidgin | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hne | Chhattisgarhi | Indo-European | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| hns | Caribbean Hindustani | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hop | Hopi | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hot | Hote | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hrv | Croatian | Unclassified | 4 | 3 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 10 | +| hsb | Upper Sorbian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hto | Minica Huitoto | Huitotoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hub | Huambisa | Chicham | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hui | Huli | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hun | Hungarian | Uralic | 5 | 3 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 12 | +| hus | Huastec | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| huu | Murui Huitoto | Huitotoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| huv | San Mateo Del Mar Huave | Huavean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hvn | Sabu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| hye | Armenian | Indo-European | 3 | 3 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 9 | +| ian | Iatmul | Ndu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ibo | Igbo | Atlantic-Congo | 3 | 5 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 12 | +| ido | Ido | Artificial Language | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ign | Ignaciano | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ikk | Ika | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ikw | Ikwere | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ile | Interlingue | Artificial Language | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ilo | Iloko | Austronesian | 2 | 1 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| imo | Imbongu | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ina | Interlingua (International Auxiliary Language Association) | Artificial Language | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| inb | Inga | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ind | Indonesian | Austronesian | 6 | 7 | 1 | 0 | 0 | 1 | 1 | 4 | 1 | 0 | 0 | 21 | +| ino | Inoke-Yate | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| iou | Tuma-Irumu | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ipi | Ipili | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| isl | Icelandic | Indo-European | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| isn | Isanzu | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ita | Italian | Indo-European | 5 | 9 | 1 | 0 | 1 | 2 | 1 | 5 | 3 | 0 | 0 | 27 | +| iws | Sepik Iwam | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ixl | Ixil | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jac | Popti' | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jae | Yabem | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jao | Yanyuwa | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jav | Javanese | Austronesian | 4 | 7 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 13 | +| jic | Tol | Jicaquean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jid | Bu (Kaduna State) | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jiv | Shuar | Chicham | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jni | Janji | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| jpn | Japanese | Japonic | 5 | 8 | 3 | 0 | 0 | 1 | 3 | 13 | 2 | 0 | 0 | 35 | +| jvn | Caribbean Javanese | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kab | Kabyle | Afro-Asiatic | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| kac | Kachin | Sino-Tibetan | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| kam | Kamba (Kenya) | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kan | Kannada | Dravidian | 6 | 7 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 19 | +| kaq | Capanahua | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kas | Kashmiri | Indo-European | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| kat | Georgian | Kartvelian | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 10 | +| kaz | Kazakh | Turkic | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| kbc | Kadiwéu | Guaicuruan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kbh | Camsá | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kbm | Iwal | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kbp | Kabiyè | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kbq | Kamano | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kdc | Kutu | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kde | Makonde | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kdl | Tsikimba | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kea | Kabuverdianu | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| kek | Kekchí | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ken | Kenyang | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kew | West Kewa | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kfg | Kudiya | Dravidian | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kfy | Kumaoni | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kgf | Kube | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kgk | Kaiwá | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kgp | Kaingang | Nuclear-Macro-Je | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| khk | Halh Mongolian | Mongolic-Khitan | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| khm | Khmer | Austroasiatic | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| khs | Kasua | Bosavi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| khz | Keapara | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kik | Kikuyu | Atlantic-Congo | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| kin | Kinyarwanda | Atlantic-Congo | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 8 | +| kir | Kirghiz | Turkic | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| kiw | Northeast Kiwai | Kiwaian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kiz | Kisi | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kje | Kisar | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kjs | East Kewa | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kkc | Odoodee | East Strickland | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kkl | Kosarek Yale | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| klt | Nukna | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| klv | Maskelynes | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmb | Kimbundu | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kmg | Kâte | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmh | Kalam | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmk | Limos Kalinga | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmo | Kwoma | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmr | Northern Kurdish | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| kms | Kamasau | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kmu | Kanite | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| knc | Central Kanuri | Saharan | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kne | Kankanaey | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| knf | Mankanya | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| knj | Western Kanjobal | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| knv | Tabo | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kon | Kongo | Unclassified | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kor | Korean | Koreanic | 4 | 8 | 1 | 0 | 1 | 2 | 1 | 9 | 3 | 0 | 0 | 29 | +| kos | Kosraean | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpf | Komba | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpg | Kapingamarangi | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpj | Karajá | Nuclear-Macro-Je | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpr | Korafe-Yegha | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpw | Kobon | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kpx | Mountain Koiali | Koiarian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kqa | Mum | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kqc | Doromu-Koki | Manubaran | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kqf | Kakabai | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kql | Kyenele | Yuat | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kqw | Kandas | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| krc | Karachay-Balkar | Turkic | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ksd | Kuanua | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ksj | Uare | Kwalean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ksr | Borong | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ktm | Kurti | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kto | Kuot | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kud | 'Auhelawa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kue | Kuman (Papua New Guinea) | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kup | Kunimaipa | Kunimaipan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kur | Kurdish | Unclassified | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| kvg | Kuni-Boazi | Anim | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kvn | Border Kuna | Chibchan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kwd | Kwaio | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kwf | Kwara'ae | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kwi | Awa-Cuaiquer | Barbacoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kwj | Kwanga | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyc | Kyaka | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyf | Kouya | Kru | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyg | Keyagana | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyq | Kenga | Central Sudanic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kyz | Kayabí | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kze | Kosena | Bookkeeping | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| kzj | Coastal Kadazan | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lac | Lacandon | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lao | Lao | Tai-Kadai | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| lat | Latin | Indo-European | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| lav | Latvian | Indo-European | 1 | 2 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| lbb | Label | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lbk | Central Bontok | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lcm | Tungag | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| leu | Kara (Papua New Guinea) | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lex | Luang | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lfn | Lingua Franca Nova | Artificial Language | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lgl | Wala | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lid | Nyindrou | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lif | Limbu | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lij | Ligurian | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| lim | Limburgan | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| lin | Lingala | Atlantic-Congo | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| lit | Lithuanian | Indo-European | 4 | 1 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| llg | Lole | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| lmo | Lombard | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| ltg | Latgalian | Unclassified | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| ltz | Luxembourgish | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| lua | Luba-Lulua | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| lug | Ganda | Atlantic-Congo | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| luo | Luo (Kenya and Tanzania) | Nilotic | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| lus | Lushai | Sino-Tibetan | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| lvs | Standard Latvian | Unclassified | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| lww | Lewo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| maa | San Jerónimo Tecóatl Mazatec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mad | Madurese | Austronesian | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| mag | Magahi | Indo-European | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| mai | Maithili | Indo-European | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| maj | Jalapa De Díaz Mazatec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mak | Makasar | Austronesian | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| mal | Malayalam | Dravidian | 7 | 7 | 2 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 19 | +| mam | Mam | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| maq | Chiquihuitlán Mazatec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mar | Marathi | Indo-European | 7 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 2 | 0 | 0 | 20 | +| mau | Huautla Mazatec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mav | Sateré-Mawé | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| max | North Moluccan Malay | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| maz | Central Mazahua | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbb | Western Bukidnon Manobo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbc | Macushi | Cariban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbh | Mangseng | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbj | Nadëb | Naduhup | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbl | Maxakalí | Nuclear-Macro-Je | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbs | Sarangani Manobo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mbt | Matigsalug Manobo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mca | Maca | Mataguayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcb | Machiguenga | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcd | Sharanahua | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcf | Matsés | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mco | Coatlán Mixe | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcp | Makaa | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcq | Ese | Koiarian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mcr | Menya | Angan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mdy | Male (Ethiopia) | Ta-Ne-Omotic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| med | Melpa | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mee | Mengen | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mek | Mekeo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| meq | Merey | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| met | Mato | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| meu | Motu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mey | Hassaniyya | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mgc | Morokodo | Central Sudanic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mgh | Makhuwa-Meetto | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mgw | Matumbi | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mhl | Mauwake | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mhr | Eastern Mari | Uralic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mib | Atatláhuca Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mic | Mi'kmaq | Algic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mie | Ocotepec Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mig | San Miguel El Grande Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mih | Chayuco Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mil | Peñoles Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| min | Minangkabau | Austronesian | 3 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | +| mio | Pinotepa Nacional Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mir | Isthmus Mixe | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mit | Southern Puebla Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| miz | Coatzospan Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mjc | San Juan Colorado Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mkd | Macedonian | Indo-European | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| mkj | Mokilese | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mkl | Mokole | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mkn | Kupang Malay | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mks | Silacayoapan Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mle | Manambu | Ndu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mlg | Malagasy | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mlh | Mape | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mlp | Bargam | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mlt | Maltese | Afro-Asiatic | 2 | 2 | 2 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| mmo | Mangga Buang | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mmx | Madak | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mna | Mbula | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mni | Manipuri | Sino-Tibetan | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| mon | Mongolian | Unclassified | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| mop | Mopán Maya | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mos | Mossi | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| mox | Molima | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mph | Maung | Iwaidjan Proper | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpj | Martu Wangka | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpm | Yosondúa Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpp | Migabac | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mps | Dadibi | Teberan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpt | Mian | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mpx | Misima-Panaeati | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mqb | Mbuko | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mqj | Mamasa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mri | Maori | Austronesian | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| msa | Malay (macrolanguage) | Unclassified | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| msb | Masbatenyo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| msc | Sankaran Maninka | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| msk | Mansaka | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| msm | Agusan Manobo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| msy | Aruamu | Ramu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mti | Maiwa (Papua New Guinea) | Dagan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mto | Totontepec Mixe | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mui | Musi | Austronesian | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| mup | Malvi | Indo-European | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| mux | Bo-Ung | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| muy | Muyang | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mva | Manam | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mvn | Minaveha | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwc | Are | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwe | Mwera (Chimwera) | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwf | Murrinh-Patha | Southern Daly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwp | Kala Lagaw Ya | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mwr | Marwari | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mxb | Tezoatlán Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mxp | Tlahuitoltepec Mixe | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mxq | Juquila Mixe | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mxt | Jamiltepec Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mya | Burmese | Sino-Tibetan | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| myk | Mamara Senoufo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| myu | Mundurukú | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| myw | Muyuw | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| myy | Macuna | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mzz | Maiadomu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nab | Southern Nambikuára | Nambiquaran | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| naf | Nabak | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nak | Nakanai | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nas | Naasioi | South Bougainville | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nbl | South Ndebele | Unclassified | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nbq | Nggem | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nca | Iyo | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nch | Central Huasteca Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ncj | Northern Puebla Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ncl | Michoacán Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ncu | Chumburung | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nde | North Ndebele | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ndg | Ndengereko | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ndj | Ndamba | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nds | Low German | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nep | Nepali (macrolanguage) | Unclassified | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| nfa | Dhao | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ngp | Ngulu | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ngu | Guerrero Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhe | Eastern Huasteca Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhg | Tetelcingo Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhi | Zacatlán-Ahuacatlán-Tepetzintla Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nho | Takuu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhr | Naro | Khoe-Kwadi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhu | Noone | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhw | Western Huasteca Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nhy | Northern Oaxaca Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nif | Nek | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nii | Nii | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nij | Ngaju | Austronesian | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| nin | Ninzo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nko | Nkonya | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nld | Dutch | Indo-European | 6 | 6 | 1 | 0 | 1 | 0 | 1 | 2 | 2 | 0 | 0 | 19 | +| nlg | Gela | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nna | Nyangumarta | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nno | Norwegian Nynorsk | Unclassified | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | +| nnq | Ngindo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| noa | Woun Meu | Chocoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nob | Norwegian Bokmål | Unclassified | 4 | 7 | 5 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 19 | +| noe | Nimadi | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nop | Numanggang | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nor | Norwegian | Indo-European | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 3 | +| not | Nomatsiguenga | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nou | Ewage-Notu | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nov | Novial | Artificial Language | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| npi | Nepali (individual language) | Indo-European | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| npl | Southeastern Puebla Nahuatl | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nqo | N'Ko | Artificial Language | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| nsn | Nehan | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nso | Pedi | Atlantic-Congo | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| nss | Nali | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ntj | Ngaanyatjarra | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ntp | Northern Tepehuan | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ntu | Natügu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nus | Nuer | Nilotic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| nuy | Nunggubuyu | Gunwinyguan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nvm | Namiae | Koiarian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nwi | Southwest Tanna | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nya | Nyanja | Atlantic-Congo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| nys | Nyungar | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| nyu | Nyungwe | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| obo | Obo Manobo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| oci | Occitan (post 1500) | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| okv | Orokaiva | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| omw | South Tairora | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ong | Olo | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ons | Ono | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ood | Tohono O'odham | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| opm | Oksapmin | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ori | Oriya (macrolanguage) | Unclassified | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| orm | Oromo | Unclassified | 1 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| orv | Old Russian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ory | Odia | Indo-European | 5 | 4 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 15 | +| ote | Mezquital Otomi | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| otm | Eastern Highland Otomi | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| otn | Tenango Otomi | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| otq | Querétaro Otomi | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ots | Estado de México Otomi | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pab | Parecís | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pad | Paumarí | Arawan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pag | Pangasinan | Austronesian | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| pah | Tenharim | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pam | Pampanga | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pan | Panjabi | Indo-European | 6 | 6 | 2 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 18 | +| pao | Northern Paiute | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pap | Papiamento | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| pbt | Southern Pashto | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| pcm | Nigerian Pidgin | Indo-European | 1 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| pes | Iranian Persian | Indo-European | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| pib | Yine | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pio | Piapoco | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pir | Piratapuyo | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| piu | Pintupi-Luritja | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pjt | Pitjantjatjara | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pls | San Marcos Tlacoyalco Popoloca | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| plt | Plateau Malagasy | Austronesian | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| plu | Palikúr | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pma | Paama | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pms | Piemontese | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| poe | San Juan Atzingo Popoloca | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| poh | Poqomchi' | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| poi | Highland Popoluca | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pol | Polish | Indo-European | 4 | 11 | 4 | 0 | 1 | 4 | 0 | 18 | 4 | 0 | 0 | 46 | +| pon | Pohnpeian | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| por | Portuguese | Indo-European | 4 | 9 | 1 | 0 | 2 | 2 | 1 | 5 | 3 | 0 | 0 | 27 | +| poy | Pogolo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ppo | Folopa | Teberan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| prf | Paranan | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pri | Paicî | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| prs | Dari | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| ptp | Patep | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ptu | Bambam | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| pus | Pushto | Unclassified | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| pwg | Gapapaiwa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qub | Huallaga Huánuco Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| quc | K'iche' | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| quf | Lambayeque Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| quh | South Bolivian Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qul | North Bolivian Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qup | Southern Pastaza Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| quy | Ayacucho Quechua | Quechuan | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| qvc | Cajamarca Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qve | Eastern Apurímac Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvh | Huamalíes-Dos de Mayo Huánuco Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvm | Margos-Yarowilca-Lauricocha Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvn | North Junín Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvs | San Martín Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvw | Huaylla Wanca Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qvz | Northern Pastaza Quichua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qwh | Huaylas Ancash Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qxh | Panao Huánuco Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qxn | Northern Conchucos Ancash Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| qxo | Southern Conchucos Ancash Quechua | Quechuan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rai | Ramoaaina | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| raj | Rajasthani | Unclassified | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| reg | Kara (Tanzania) | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rej | Rejang | Austronesian | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| rgu | Ringgou | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rkb | Rikbaktsa | Nuclear-Macro-Je | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rmc | Carpathian Romani | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rmy | Vlax Romani | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rom | Romany | Unclassified | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| ron | Romanian | Indo-European | 5 | 6 | 1 | 0 | 1 | 0 | 1 | 3 | 1 | 0 | 0 | 18 | +| roo | Rotokas | North Bougainville | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rop | Kriol | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| row | Dela-Oenale | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rro | Waima | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ruf | Luguru | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| rug | Roviana | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| run | Rundi | Atlantic-Congo | 1 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| rus | Russian | Indo-European | 5 | 13 | 6 | 0 | 2 | 4 | 2 | 16 | 4 | 0 | 0 | 52 | +| rwo | Rawa | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sab | Buglere | Chibchan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sag | Sango | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| sah | Yakut | Turkic | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| san | Sanskrit | Indo-European | 5 | 3 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 10 | +| sat | Santali | Austroasiatic | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| sbe | Saliba | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sbk | Safwa | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sbs | Subiya | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| scn | Sicilian | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| sco | Scots | Indo-European | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| seh | Sena | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sey | Secoya | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sgb | Mag-antsi Ayta | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sgz | Sursurunga | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| shi | Tachelhit | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| shj | Shatt | Dajuic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| shn | Shan | Tai-Kadai | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| shp | Shipibo-Conibo | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sim | Mende (Papua New Guinea) | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sin | Sinhala | Indo-European | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| sja | Epena | Chocoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| slk | Slovak | Indo-European | 3 | 4 | 1 | 0 | 1 | 0 | 0 | 3 | 0 | 0 | 0 | 12 | +| sll | Salt-Yui | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| slv | Slovenian | Indo-European | 3 | 4 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 10 | +| smk | Bolinao | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| smo | Samoan | Austronesian | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| sna | Shona | Atlantic-Congo | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| snc | Sinaugoro | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| snd | Sindhi | Indo-European | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| snn | Siona | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| snp | Siane | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| snx | Sam | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sny | Saniyo-Hiyewe | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| som | Somali | Afro-Asiatic | 3 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| soq | Kanasi | Dagan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sot | Southern Sotho | Atlantic-Congo | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| soy | Miyobe | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| spa | Spanish | Indo-European | 4 | 13 | 4 | 0 | 1 | 2 | 2 | 13 | 4 | 0 | 0 | 43 | +| spl | Selepet | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| spm | Akukem | Ramu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| spp | Supyire Senoufo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sps | Saposa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| spy | Sabaot | Nilotic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sqi | Albanian | Unclassified | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| srd | Sardinian | Unclassified | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| sri | Siriano | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| srm | Saramaccan | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| srn | Sranan Tongo | Indo-European | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| srp | Serbian | Unclassified | 4 | 1 | 1 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 9 | +| srq | Sirionó | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ssd | Siroi | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ssg | Seimat | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ssw | Swati | Atlantic-Congo | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| ssx | Samberigi | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| stp | Southeastern Tepehuan | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sua | Sulka | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sue | Suena | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sun | Sundanese | Austronesian | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 9 | +| sus | Susu | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| suz | Sunwar | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| svk | Slovakian Sign Language | Sign Language | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| swa | Swahili (macrolanguage) | Unclassified | 1 | 7 | 2 | 0 | 0 | 1 | 1 | 3 | 0 | 0 | 0 | 15 | +| swe | Swedish | Indo-European | 4 | 8 | 3 | 0 | 1 | 1 | 1 | 4 | 0 | 0 | 0 | 22 | +| swg | Swabian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| swh | Swahili (individual language) | Atlantic-Congo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| swp | Suau | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| sxb | Suba | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| szl | Silesian | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| tac | Lowland Tarahumara | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tah | Tahitian | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| taj | Eastern Tamang | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tam | Tamil | Dravidian | 7 | 7 | 2 | 0 | 0 | 1 | 0 | 3 | 1 | 0 | 0 | 21 | +| taq | Tamasheq | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| tat | Tatar | Turkic | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| tav | Tatuyo | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| taw | Tai | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbc | Takia | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbf | Mandara | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbg | North Tairora | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbo | Tawala | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tbz | Ditammari | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tca | Ticuna | Ticuna-Yuri | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tcs | Torres Strait Creole | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tcz | Thado Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tdt | Tetun Dili | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tee | Huehuetla Tepehua | Totonacan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tel | Telugu | Dravidian | 7 | 7 | 2 | 0 | 0 | 0 | 1 | 5 | 2 | 0 | 0 | 24 | +| ter | Tereno | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tet | Tetum | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tew | Tewa (USA) | Kiowa-Tanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tfr | Teribe | Chibchan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tgk | Tajik | Indo-European | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| tgl | Tagalog | Austronesian | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| tgo | Sudest | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tgp | Tangoa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tha | Thai | Tai-Kadai | 4 | 8 | 1 | 0 | 0 | 1 | 1 | 6 | 0 | 0 | 0 | 21 | +| tif | Tifal | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tim | Timbe | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tir | Tigrinya | Afro-Asiatic | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| tiw | Tiwi | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tiy | Tiruray | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tke | Takwane | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tku | Upper Necaxa Totonac | Totonacan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tlf | Telefol | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tmd | Haruai | Piawi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tna | Tacana | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tnc | Tanimuca-Retuarã | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tnk | Kwamera | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tnn | North Tanna | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tnp | Whitesands | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| toc | Coyutla Totonac | Totonacan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tod | Toma | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tof | Gizrra | Eastern Trans-Fly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| toj | Tojolabal | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ton | Tonga (Tonga Islands) | Austronesian | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| too | Xicotepec De Juárez Totonac | Totonacan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| top | Papantla Totonac | Totonacan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tos | Highland Totonac | Totonacan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tpa | Taupota | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tpi | Tok Pisin | Indo-European | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | +| tpt | Tlachichilco Tepehua | Totonacan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tpz | Tinputz | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| trc | Copala Triqui | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tsn | Tswana | Atlantic-Congo | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| tso | Tsonga | Atlantic-Congo | 1 | 4 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| tsw | Tsishingini | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ttc | Tektiteko | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tte | Bwanabwana | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tuc | Mutu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tue | Tuyuca | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tuf | Central Tunebo | Chibchan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tuk | Turkmen | Turkic | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | +| tum | Tumbuka | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| tuo | Tucano | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tur | Turkish | Turkic | 4 | 7 | 1 | 0 | 0 | 2 | 0 | 3 | 2 | 0 | 0 | 19 | +| tvk | Southeast Ambrym | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| twi | Twi | Unclassified | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| txq | Tii | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| txu | Kayapó | Nuclear-Macro-Je | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tyv | Tuvinian | Turkic | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tzj | Tz'utujil | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tzl | Talossan | Artificial Language | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| tzm | Central Atlas Tamazight | Afro-Asiatic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| tzo | Tzotzil | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ubr | Ubir | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ubu | Umbu-Ungu | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| udu | Uduk | Koman | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| uig | Uighur | Turkic | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | +| ukr | Ukrainian | Indo-European | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | +| uli | Ulithian | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ulk | Meriam Mir | Eastern Trans-Fly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| umb | Umbundu | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| upv | Uripiv-Wala-Rano-Atchin | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ura | Urarina | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| urb | Urubú-Kaapor | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| urd | Urdu | Indo-European | 7 | 8 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 19 | +| uri | Urim | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| urt | Urat | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| urw | Sop | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| usa | Usarufa | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| usp | Uspanteco | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| uvh | Uri | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| uvl | Lote | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| uzb | Uzbek | Unclassified | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| uzn | Northern Uzbek | Turkic | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | +| vec | Venetian | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| ven | Venda | Atlantic-Congo | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| vid | Vidunda | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| vie | Vietnamese | Austroasiatic | 5 | 6 | 1 | 0 | 0 | 1 | 0 | 5 | 0 | 0 | 0 | 18 | +| viv | Iduna | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| vmy | Ayautla Mazatec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| waj | Waffa | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wal | Wolaytta | Ta-Ne-Omotic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wap | Wapishana | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| war | Waray (Philippines) | Austronesian | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| wat | Kaninuwa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wbi | Vwanji | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wbp | Warlpiri | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wed | Wedau | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wer | Weri | Kunimaipan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wim | Wik-Mungkan | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wiu | Wiru | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wiv | Vitu | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wln | Walloon | Indo-European | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wmt | Walmajarri | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wmw | Mwani | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wnc | Wantoat | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wnu | Usan | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wol | Wolof | Atlantic-Congo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | +| wos | Hanga Hundi | Ndu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wrk | Garrwa | Garrwan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wro | Worrorra | Worrorran | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wrs | Waris | Border | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wsk | Waskia | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wuu | Wu Chinese | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| wuv | Wuvulu-Aua | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xav | Xavánte | Nuclear-Macro-Je | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xbi | Kombio | Nuclear Torricelli | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xed | Hdi | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xho | Xhosa | Atlantic-Congo | 3 | 3 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 10 | +| xla | Kamula | Kamula-Elevala | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xnn | Northern Kankanay | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xon | Konkomba | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xsi | Sio | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xtd | Diuxi-Tilantongo Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| xtm | Magdalena Peñasco Mixtec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yaa | Yaminahua | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yad | Yagua | Peba-Yagua | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yal | Yalunka | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yap | Yapese | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yaq | Yaqui | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yby | Yaweyuha | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ycn | Yucuna | Arawakan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ydd | Eastern Yiddish | Indo-European | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| yid | Yiddish | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yka | Yakan | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yle | Yele | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yml | Iamalele | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yon | Yongkom | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yor | Yoruba | Atlantic-Congo | 4 | 5 | 3 | 0 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | 16 | +| yrb | Yareba | Yareban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yre | Yaouré | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yss | Yessan-Mayo | Sepik | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yue | Yue Chinese | Sino-Tibetan | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| yuj | Karkar-Yuri | Pauwasi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yut | Yopno | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yuw | Yau (Morobe Province) | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| yva | Yawa | Yawa-Saweru | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zaa | Sierra de Juárez Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zab | Western Tlacolula Valley Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zac | Ocotlán Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zad | Cajonos Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zai | Isthmus Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zaj | Zaramo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zam | Miahuatlán Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zao | Ozolotepec Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zap | Zapotec | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zar | Rincón Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zas | Santo Domingo Albarradas Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zat | Tabaa Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zav | Yatzachi Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zaw | Mitla Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zca | Coatecas Altas Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zga | Kinga | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zho | Chinese | Unclassified | 2 | 2 | 1 | 0 | 0 | 1 | 1 | 13 | 0 | 0 | 0 | 20 | +| zia | Zia | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ziw | Zigula | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zlm | Malay (individual language) | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zos | Francisco León Zoque | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpc | Choapan Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpl | Lachixío Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpm | Mixtepec Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpo | Amatlán Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpq | Zoogocho Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpu | Yalálag Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpv | Chichicapan Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zpz | Texmelucan Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zsm | Standard Malay | Austronesian | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | +| zsr | Southern Rincon Zapotec | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| ztq | Quioquitani-Quierí Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zty | Yatee Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| zul | Zulu | Atlantic-Congo | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | +| zyp | Zyphe Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| Total | None | None | None | 1394 | 795 | 304 | 3 | 28 | 67 | 50 | 460 | 85 | 2 | 2 |
diff --git a/mteb/language_family.json b/mteb/language_family.json new file mode 100644 index 0000000000..5770aa6712 --- /dev/null +++ b/mteb/language_family.json @@ -0,0 +1,62611 @@ +{ + "aaa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "North-Central Edoid", + "level6": "Afenmai-Bendel" + }, + "aab": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Alumic", + "level5": "Alumu-Akpondu" + }, + "aac": { + "level0": "Suki-Gogodala", + "level1": "Gogodalic", + "level2": "Ari-Waruna" + }, + "aad": { + "level0": "Sepik" + }, + "aae": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Albanian", + "level3": "Albanian-Tosk", + "level4": "Southern Tosk" + }, + "aaf": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "aag": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Nuclear Palai", + "level4": "Yangum-Ambrak" + }, + "aah": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Arapesh", + "level3": "Mufian-Bukiyip-Abu", + "level4": "Bukiyip-Abu" + }, + "aai": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Are linkage" + }, + "aak": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Ankave-Tainae-Akoye" + }, + "aal": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Septentrional", + "level6": "Kotoko Septentrional 1" + }, + "aam": { + "level0": "Bookkeeping" + }, + "aan": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup V", + "level6": "Arawetic" + }, + "aao": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic" + }, + "aap": { + "level0": "Cariban", + "level1": "Pekodian", + "level2": "Xinguan" + }, + "aaq": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Northern Eastern Algonquian", + "level6": "Abenaki" + }, + "aar": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Saho-Afar" + }, + "aas": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "South Cushitic" + }, + "aat": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Albanian", + "level3": "Albanian-Tosk", + "level4": "Southern Tosk" + }, + "aau": { + "level0": "Sepik" + }, + "aaw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Arawe", + "level11": "West Arawe" + }, + "aax": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Dumut", + "level6": "Mandobo" + }, + "aay": { + "level0": "Bookkeeping" + }, + "aaz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "West Rote", + "level5": "Dengka-Meto", + "level6": "Meto" + }, + "aba": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Agneby" + }, + "abb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Basaa (A.40)", + "level9": "Abo-Barombi" + }, + "abc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic" + }, + "abd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Alabat-Manide Agta" + }, + "abe": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Northern Eastern Algonquian", + "level6": "Abenaki" + }, + "abf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Paitanic" + }, + "abg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria", + "level5": "Kamano-Yagaria", + "level6": "Unclassified Kamano-Yagaria" + }, + "abh": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Eastern Arabic", + "level7": "Central Asian Arabic" + }, + "abi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Agneby" + }, + "abj": { + "level0": "Great Andamanese", + "level1": "South Great Andamanese" + }, + "abk": { + "level0": "Abkhaz-Adyge", + "level1": "Abkhaz-Abaza" + }, + "abl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Lampungic" + }, + "abm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid", + "level7": "Bakor-Ejagham", + "level8": "Bakor", + "level9": "Northern Bakor", + "level10": "Abanyom-Nkem-Nkum" + }, + "abn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta", + "level5": "Abua-Odual" + }, + "abo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "North Tivoid" + }, + "abp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic", + "level4": "Abellen-Botolan" + }, + "abq": { + "level0": "Abkhaz-Adyge", + "level1": "Abkhaz-Abaza" + }, + "abr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Akanic" + }, + "abs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay", + "level7": "Ambonic Malay" + }, + "abt": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Ambulas-Hanga-Hundi" + }, + "abu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Western Tano" + }, + "abv": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "North Arabian Beduin Arabic" + }, + "abw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Omosan" + }, + "abx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw" + }, + "aby": { + "level0": "Yareban", + "level1": "Doriri-Abia" + }, + "abz": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "Central Alor", + "level4": "Abuic" + }, + "aca": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Northeast Japura-Colombia", + "level4": "Piapoco-Achagua" + }, + "acb": { + "level0": "Bookkeeping" + }, + "acc": { + "level0": "Bookkeeping" + }, + "acd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "Mountain Oti North Guang", + "level10": "Gikyode-Ginyanga", + "level11": "Gikyode-Foodo" + }, + "ace": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic" + }, + "acf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Circum-Caribbean French", + "level16": "Lesser Antillean French Creole" + }, + "ach": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Southern Lwoo" + }, + "aci": { + "level0": "Great Andamanese", + "level1": "North Andamanese-Akakede", + "level2": "Northern Great Andamanese", + "level3": "Bo-Cari" + }, + "ack": { + "level0": "Great Andamanese", + "level1": "North Andamanese-Akakede", + "level2": "Northern Great Andamanese", + "level3": "Jeru-Kora" + }, + "acl": { + "level0": "Great Andamanese", + "level1": "South Great Andamanese" + }, + "acm": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Eastern Arabic" + }, + "acn": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Achangic" + }, + "acp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Kamuku-Hungwarya", + "level7": "Kamuku", + "level8": "Rogo-Sagamuk-Sama-Sambuga", + "level9": "Sagamuk-Sama-Sambuga" + }, + "acq": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic" + }, + "acr": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Core Quichean", + "level5": "Quiche-Achi" + }, + "acs": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Central Je" + }, + "acu": { + "level0": "Chicham", + "level1": "Shuaric" + }, + "acv": { + "level0": "Palaihnihan" + }, + "acw": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic" + }, + "acx": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "North Arabian Beduin Arabic" + }, + "acy": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Levantine-Cypriot Arabic" + }, + "acz": { + "level0": "Narrow Talodi", + "level1": "Buram-Saraf", + "level2": "Acheron-Tocho" + }, + "ada": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ga-Dangme" + }, + "adb": { + "level0": "Bookkeeping" + }, + "add": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Nkambe" + }, + "ade": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo", + "level4": "Basila-Adele" + }, + "adf": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "North Arabian Beduin Arabic", + "level8": "Dhofaric" + }, + "adg": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Arandic", + "level3": "South Arandic", + "level4": "Upper Arrernte", + "level5": "Central-Eastern Arrernte" + }, + "adh": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Southern Lwoo", + "level4": "Adhola-Alur-Luo", + "level5": "Adhola-Luo" + }, + "adi": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Tani", + "level3": "Eastern Tani" + }, + "adj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Agneby" + }, + "adl": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Tani", + "level3": "Pre-Western Tani", + "level4": "Western Tani", + "level5": "Subansiri" + }, + "adn": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "West Alor" + }, + "ado": { + "level0": "Ramu", + "level1": "Agoan" + }, + "adp": { + "level0": "Bookkeeping" + }, + "adq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Western Gbe", + "level5": "Eweic" + }, + "adr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Barat", + "level5": "North Lembata-Adonara" + }, + "ads": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "adt": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Northern Thura-Yura" + }, + "adu": { + "level0": "Bookkeeping" + }, + "adw": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva", + "level7": "Nuclear Kawahiva", + "level8": "Central Kawahiva", + "level9": "Amondava-Uru-Eu-Wau-Wau" + }, + "adx": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "North-Eastern Tibetic" + }, + "ady": { + "level0": "Abkhaz-Adyge", + "level1": "Circassian" + }, + "adz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Upper Markham" + }, + "aea": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Norman Pama", + "level3": "Kuthant-Gurdjar", + "level4": "Rib-Gurdjar" + }, + "aeb": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic", + "level7": "Malta-Tunisian Arabic" + }, + "aec": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Egyptic Arabic", + "level7": "Egypto-Sudanic Arabic" + }, + "aed": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "aee": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Pashayi", + "level5": "Eastern Pashayi" + }, + "aek": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Voh-Kone", + "level11": "Bwatooic", + "level12": "Haeke-Bwatoo" + }, + "ael": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields" + }, + "aem": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Chutic", + "level3": "East Chutic" + }, + "aen": { + "level0": "Sign Language", + "level1": "Auxiliary Sign Systems" + }, + "aeq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Gujaratic", + "level10": "Western Gujaratic" + }, + "aer": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Arandic", + "level3": "South Arandic", + "level4": "Upper Arrernte", + "level5": "Central-Eastern Arrernte" + }, + "aeu": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Ha-Ya", + "level8": "Akhaic", + "level9": "Akeuic" + }, + "aew": { + "level0": "Keram", + "level1": "East Keram" + }, + "aex": { + "level0": "Bookkeeping" + }, + "aey": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Gum" + }, + "aez": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Orokaivic" + }, + "afb": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "North Arabian Beduin Arabic" + }, + "afd": { + "level0": "Arafundi" + }, + "afe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic" + }, + "afg": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "afh": { + "level0": "Artificial Language" + }, + "afi": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Tamolan" + }, + "afk": { + "level0": "Arafundi" + }, + "afn": { + "level0": "Ijoid" + }, + "afo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau" + }, + "afp": { + "level0": "Arafundi" + }, + "afr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Global Dutch", + "level9": "Afrikaansic" + }, + "afs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Gullah-Nevis-Antigua", + "level15": "Gullah" + }, + "aft": { + "level0": "Nyimang" + }, + "afu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "South Guang" + }, + "afz": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Eritai-Obokuitai-Biritai" + }, + "aga": { + "level0": "Unattested", + "level1": "Arawakan (Unattested)" + }, + "agb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross", + "level7": "Mbembe-Legbo", + "level8": "Legboic" + }, + "agc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Akweya", + "level5": "Etulo-Idoma", + "level6": "Nuclear Idoma", + "level7": "Idoma-Agatu-Okpogu" + }, + "agd": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Gauwa", + "level4": "Gadsup-Agarabi" + }, + "age": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli", + "level3": "Sau-Angal-Kewa", + "level4": "Angal-Kewa", + "level5": "Angal Mendi" + }, + "agf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuta" + }, + "agg": { + "level0": "Senagi" + }, + "agh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Middle Bomokandian", + "level15": "Late Bomokandian" + }, + "agi": { + "level0": "Unattested", + "level1": "Dravidian (Unattested)" + }, + "agj": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Amharic-Argobba" + }, + "agk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol", + "level5": "Inagta Bikol" + }, + "agl": { + "level0": "East Strickland" + }, + "agm": { + "level0": "Angan" + }, + "agn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Kalamian" + }, + "ago": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Ankave-Tainae-Akoye", + "level3": "Tainae-Akoye" + }, + "agq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "West Ring", + "level10": "Aghemic", + "level11": "Aghem-Weh" + }, + "agr": { + "level0": "Chicham" + }, + "ags": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid" + }, + "agt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Gaddangic" + }, + "agu": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Mamean", + "level4": "Ixilan" + }, + "agv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon" + }, + "agw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Makira" + }, + "agx": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Eastern Samur", + "level5": "Tabasaran-Aghul-Lezgi", + "level6": "Aghul-Lezgi", + "level7": "Aghulic" + }, + "agy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran" + }, + "agz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol", + "level5": "Inagta Bikol" + }, + "aha": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Southern Bia" + }, + "ahb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage", + "level8": "Central-Southeast Malakula", + "level9": "Southeastern Malakula linkage", + "level10": "Port Sandwich-Axamb-Avok" + }, + "ahe": { + "level0": "Bookkeeping" + }, + "ahg": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "Agaw", + "level3": "Northern-Eastern-Western Agaw" + }, + "ahh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Awyu", + "level6": "Mappi-Digul Awyu" + }, + "ahi": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Aizi" + }, + "ahk": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Ha-Ya", + "level8": "Akhaic" + }, + "ahl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Kposo-Ahlo-Bowili" + }, + "ahm": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Aizi" + }, + "ahn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Ayere-Ahan" + }, + "aho": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic" + }, + "ahp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Unclassified Volta-Congo" + }, + "ahr": { + "level0": "Bookkeeping" + }, + "ahs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Koroic", + "level7": "Tinoric" + }, + "aht": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Southern Alaskan Athabaskan" + }, + "aia": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Makira" + }, + "aib": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Turkestan", + "level4": "Modern Turkestan", + "level5": "Uyghuric" + }, + "aic": { + "level0": "Border", + "level1": "Bewani", + "level2": "Pagi-Kilmeri" + }, + "aid": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama", + "level3": "Linngithigh-Alngith" + }, + "aie": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage" + }, + "aif": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "West Palai", + "level3": "Agi-Yeri" + }, + "aig": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Gullah-Nevis-Antigua" + }, + "aih": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Then-MMS", + "level4": "Maonan-Mak-Sui", + "level5": "Mak-Ai-Cham" + }, + "aii": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic" + }, + "aij": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic", + "level11": "Trans-Zab", + "level12": "Western Trans-Zab" + }, + "aik": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Jilic-Eggonic", + "level5": "Eggon-Ake" + }, + "ail": { + "level0": "Bosavi", + "level1": "Bosavi Watershed" + }, + "aim": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Central Old Kuki" + }, + "ain": { + "level0": "Ainu", + "level1": "Hokkaido-Kuril Ainu" + }, + "aio": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Mogaung", + "level12": "Assam Tai A" + }, + "aip": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Kwer-Kopkaka-Burumakok", + "level6": "Kwer-Burumakok" + }, + "aiq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic", + "level9": "Eastern Farsic" + }, + "air": { + "level0": "Greater Kwerba", + "level1": "Kwerba-Samarokena", + "level2": "Samarokena-Airoran" + }, + "ait": { + "level0": "Tupian", + "level1": "Arikem-Tupari", + "level2": "Arikemic" + }, + "aiw": { + "level0": "South Omotic", + "level1": "AHK", + "level2": "Aari-Gayil" + }, + "aix": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Pasismanua" + }, + "aiy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Oriental", + "level5": "Gbanu-Manza-Ngbaka", + "level6": "Manza-Ngbaka", + "level7": "Manzaic", + "level8": "Ngbaka-Manza-Ali" + }, + "aja": { + "level0": "Kresh-Aja" + }, + "ajg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe" + }, + "aji": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian", + "level9": "Houailou" + }, + "ajs": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "aju": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic" + }, + "ajw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2", + "level6": "Western West Chadic B.2" + }, + "ajz": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Karbic" + }, + "aka": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Akanic" + }, + "akb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Batakic", + "level4": "Central-Southern Batak", + "level5": "Southern Batak", + "level6": "Angkola-Mandailing" + }, + "akd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross" + }, + "ake": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Pemong-Panare", + "level3": "Pemongan", + "level4": "Kapong" + }, + "akf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Yatye-Akpa" + }, + "akg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Sumba", + "level6": "Central-East Sumbanese", + "level7": "Central Sumbanese" + }, + "akh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli", + "level3": "Sau-Angal-Kewa", + "level4": "Angal-Kewa", + "level5": "Angal Mendi" + }, + "aki": { + "level0": "Ramu", + "level1": "Aian" + }, + "akj": { + "level0": "Great Andamanese", + "level1": "North Andamanese-Akakede", + "level2": "Northern Great Andamanese", + "level3": "Jeru-Kora" + }, + "akk": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "East Semitic" + }, + "akl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "West Bisayan" + }, + "akm": { + "level0": "Great Andamanese", + "level1": "North Andamanese-Akakede", + "level2": "Northern Great Andamanese", + "level3": "Bo-Cari" + }, + "akn": { + "level0": "Bookkeeping" + }, + "ako": { + "level0": "Cariban", + "level1": "Guianan", + "level2": "Taranoan", + "level3": "Tiriyoan" + }, + "akp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo", + "level4": "Lelemic", + "level5": "Lelemi-Akpafu" + }, + "akq": { + "level0": "Sepik", + "level1": "Yellow River" + }, + "akr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "South Santo", + "level9": "Araki-Tangoa" + }, + "aks": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Gurma", + "level12": "Gurma A" + }, + "akt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Arawe", + "level11": "East Arawe" + }, + "aku": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Yukubenic", + "level5": "Akum-Beezen" + }, + "akv": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic", + "level4": "Akhvakhic" + }, + "akw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Mboshi (C.20)" + }, + "akx": { + "level0": "Great Andamanese", + "level1": "North Andamanese-Akakede" + }, + "aky": { + "level0": "Great Andamanese", + "level1": "Middle Great Andamanese", + "level2": "Okol-Opucikwar" + }, + "akz": { + "level0": "Muskogean", + "level1": "Alabaman-Koasati" + }, + "ala": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Akweya", + "level5": "Etulo-Idoma", + "level6": "Nuclear Idoma" + }, + "alc": { + "level0": "Kawesqar", + "level1": "North Central Alacufan" + }, + "ald": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Avikam-Alladian" + }, + "ale": { + "level0": "Eskimo-Aleut", + "level1": "Aleutic" + }, + "alf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic", + "level7": "Bukpic" + }, + "alh": { + "level0": "Mangarrayi-Maran", + "level1": "Maran" + }, + "ali": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles" + }, + "alj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Mangyan" + }, + "alk": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric" + }, + "all": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "alm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "South-Central Santo" + }, + "aln": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Albanian" + }, + "alo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "West Piru Bay", + "level5": "Hoamoal", + "level6": "East Hoamoal" + }, + "alp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers", + "level4": "Amalumute", + "level5": "Northwest Seram", + "level6": "Ulat Inai" + }, + "alq": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa", + "level6": "Severn-Algonquin" + }, + "alr": { + "level0": "Chukotko-Kamchatkan", + "level1": "Chukotian" + }, + "als": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Albanian", + "level3": "Albanian-Tosk" + }, + "alt": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Southeast Kipchak", + "level5": "East Kipchak" + }, + "alu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Southern Malaita" + }, + "alw": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Highland East Cushitic", + "level4": "Sidaama-Hadiyya-Kambaata", + "level5": "Hadiyya-Kambaata", + "level6": "Kambaataic" + }, + "alx": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Nuclear Palai", + "level4": "Bragat-Aruop-Amol" + }, + "aly": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Arandic", + "level3": "South Arandic", + "level4": "Upper Arrernte" + }, + "alz": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Southern Lwoo", + "level4": "Adhola-Alur-Luo" + }, + "ama": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup V", + "level6": "Arawetic" + }, + "amb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "North Tivoid" + }, + "amc": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano" + }, + "amd": { + "level0": "Bookkeeping" + }, + "ame": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha" + }, + "amf": { + "level0": "South Omotic", + "level1": "AHK", + "level2": "Hamer-Karo" + }, + "amg": { + "level0": "Iwaidjan Proper" + }, + "amh": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Amharic-Argobba" + }, + "ami": { + "level0": "Austronesian", + "level1": "East Formosan", + "level2": "Central East Formosan" + }, + "amj": { + "level0": "Furan" + }, + "amk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Ansus-Ambai" + }, + "aml": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Khasian" + }, + "amm": { + "level0": "Left May", + "level1": "Western Left May" + }, + "amn": { + "level0": "Border", + "level1": "Warisic" + }, + "amo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos" + }, + "amp": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Eastern Sepik Hill" + }, + "amq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Uliase", + "level8": "Hatuhaha", + "level9": "Saparuan", + "level10": "Elpaputi" + }, + "amr": { + "level0": "Harakmbut" + }, + "ams": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Amami", + "level4": "Nuclear Amami", + "level5": "Oshima" + }, + "amt": { + "level0": "Amto-Musan" + }, + "amu": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Amuzgoan" + }, + "amv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "West Central Maluku" + }, + "amw": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Western Aramaic" + }, + "amx": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Arandic", + "level3": "South Arandic", + "level4": "Upper Arrernte" + }, + "amy": { + "level0": "Western Daly", + "level1": "Maranunggu-Ame-Manda", + "level2": "Ame-Manda" + }, + "amz": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama", + "level3": "Gudang-Northeast Paman", + "level4": "Northeast Paman", + "level5": "Uradhic" + }, + "anb": { + "level0": "Zaparoan", + "level1": "Iquito-Arabela", + "level2": "Arabela-Andoa" + }, + "anc": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3" + }, + "and": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Ansus-Ambai" + }, + "ane": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian" + }, + "anf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Kebu-Animere" + }, + "ang": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic" + }, + "anh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "Apalic", + "level6": "Greater West Sogeram", + "level7": "West Sogeram" + }, + "ani": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic" + }, + "anj": { + "level0": "Ramu", + "level1": "Aian" + }, + "ank": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Goemaic" + }, + "anl": { + "level0": "Sino-Tibetan", + "level1": "Mruic" + }, + "anm": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Anal-Lamgang" + }, + "ann": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross" + }, + "anp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan" + }, + "anq": { + "level0": "Jarawa-Onge" + }, + "ans": { + "level0": "Chocoan", + "level1": "Unclassified Chocoan" + }, + "ant": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Unclassified Wati" + }, + "anu": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Northern Lwoo" + }, + "anv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Mamfe", + "level6": "Kendem-Denya" + }, + "anw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Efikic", + "level8": "Okop Usem" + }, + "anx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus" + }, + "any": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Northern Bia", + "level9": "Anyinic" + }, + "aoa": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Lower Guinea Portuguese", + "level15": "Bantu Layer Lower Guinea Portuguese", + "level16": "Saotomic" + }, + "aob": { + "level0": "Anim", + "level1": "Tirio" + }, + "aoc": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Pemong-Panare", + "level3": "Pemongan" + }, + "aod": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Ataitan" + }, + "aoe": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli", + "level3": "Sau-Angal-Kewa", + "level4": "Angal-Kewa", + "level5": "Angal Mendi" + }, + "aof": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Nuclear Palai", + "level4": "Bragat-Aruop-Amol" + }, + "aog": { + "level0": "Lower Sepik" + }, + "aoh": { + "level0": "Unattested", + "level1": "Chocoan (Unattested)" + }, + "aoi": { + "level0": "Gunwinyguan", + "level1": "Eastern Gunwinyguan", + "level2": "Wubuy-Anindilyakwa" + }, + "aoj": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Arapesh", + "level3": "Mufian-Bukiyip-Abu" + }, + "aok": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian", + "level9": "Houailou" + }, + "aol": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Barat" + }, + "aom": { + "level0": "Koiarian", + "level1": "Baraic" + }, + "aon": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Arapesh" + }, + "aor": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "South Santo" + }, + "aos": { + "level0": "Border", + "level1": "Taikat-Awyi" + }, + "aot": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Kochic" + }, + "aou": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Gauic", + "level5": "Gelaoic", + "level6": "Northern Gelao", + "level7": "Ahouic" + }, + "aox": { + "level0": "Arawakan", + "level1": "Negro-Roraima", + "level2": "Pidjanan", + "level3": "Wapishanan", + "level4": "Wapishana-Atorai" + }, + "aoz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "West Rote", + "level5": "Dengka-Meto", + "level6": "Meto", + "level7": "Central Meto" + }, + "apb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira" + }, + "apc": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Levantine-Cypriot Arabic" + }, + "apd": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Egyptic Arabic", + "level7": "Egypto-Sudanic Arabic", + "level8": "Sudanese-Chadian Arabic", + "level9": "East Sudanic Arabic" + }, + "ape": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Arapesh", + "level3": "Mufian-Bukiyip-Abu", + "level4": "Bukiyip-Abu" + }, + "apf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Northeastern Luzon", + "level4": "Nuclear Northeastern Luzon", + "level5": "Paranan-Pahanan" + }, + "apg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Barito-Mahakam" + }, + "aph": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Tamar", + "level6": "Yakkha-Athpariyic", + "level7": "Athpariyic" + }, + "api": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva" + }, + "apj": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Apachean", + "level4": "Southwestern Apachean", + "level5": "Eastern Southwestern Apachean" + }, + "apk": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Apachean" + }, + "apl": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Apachean", + "level4": "Southwestern Apachean", + "level5": "Eastern Southwestern Apachean" + }, + "apm": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Apachean", + "level4": "Southwestern Apachean", + "level5": "Western Southwestern Apachean" + }, + "apn": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Goyaz", + "level4": "Northern Je" + }, + "apo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Arawe", + "level11": "West Arawe" + }, + "app": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "South Pentecost" + }, + "apq": { + "level0": "Great Andamanese", + "level1": "Middle Great Andamanese", + "level2": "Okol-Opucikwar" + }, + "apr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Korap linkage" + }, + "aps": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Siau", + "level8": "Sissano-Tumleo", + "level9": "Sera-Sissano", + "level10": "Sissanoic" + }, + "apt": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Tani", + "level3": "Pre-Western Tani", + "level4": "Western Tani" + }, + "apu": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Purus-Chamicuro", + "level3": "Purus" + }, + "apv": { + "level0": "Unattested", + "level1": "Nambiquaran (Unattested)" + }, + "apw": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Apachean", + "level4": "Southwestern Apachean", + "level5": "Western Southwestern Apachean" + }, + "apx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Wetar-Atauro", + "level4": "Wetar", + "level5": "Perai-Tugun-Aputai", + "level6": "Perai-Aputai" + }, + "apy": { + "level0": "Cariban" + }, + "apz": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Wojokesic" + }, + "aqc": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic" + }, + "aqd": { + "level0": "Dogon", + "level1": "West Dogon", + "level2": "Penangic" + }, + "aqg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid" + }, + "aqk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic" + }, + "aqm": { + "level0": "Kayagaric" + }, + "aqn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran" + }, + "aqr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian", + "level9": "Houailou" + }, + "aqt": { + "level0": "Lengua-Mascoy", + "level1": "Eastern Enlhet-Enenlhet" + }, + "aqz": { + "level0": "Tupian", + "level1": "Arikem-Tupari", + "level2": "Tuparic", + "level3": "Nuclear Tuparic", + "level4": "Corumbiara" + }, + "arb": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic" + }, + "arc": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic" + }, + "ard": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Palku", + "level3": "Arabana-Wangganguru" + }, + "are": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Arandic", + "level3": "South Arandic", + "level4": "Upper Arrernte" + }, + "arg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Unshifted Western Romance" + }, + "arh": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Northern Magdalenic", + "level4": "Arhuacic", + "level5": "Eastern-Southern Arhuacic" + }, + "ari": { + "level0": "Caddoan", + "level1": "Northern Caddoan", + "level2": "Pawnee-Kitsai", + "level3": "Pawnee-Arikara" + }, + "arj": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan II", + "level4": "Kotiria-Piratapuyo", + "level5": "Piratapuyic", + "level6": "Arapaso-Miriti" + }, + "ark": { + "level0": "Nuclear-Macro-Je", + "level1": "Jabuti" + }, + "arl": { + "level0": "Zaparoan", + "level1": "Iquito-Arabela", + "level2": "Arabela-Andoa" + }, + "arn": { + "level0": "Araucanian" + }, + "aro": { + "level0": "Pano-Tacanan", + "level1": "Tacanan", + "level2": "Takanik-Chamik", + "level3": "Takanik", + "level4": "Araona-Toromono" + }, + "arp": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Arapahoic", + "level4": "Arapaho-Gros Ventre-Besawunena" + }, + "arq": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic" + }, + "arr": { + "level0": "Tupian", + "level1": "Purubora-Ramarama", + "level2": "Ramarama" + }, + "ars": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "North Arabian Beduin Arabic" + }, + "aru": { + "level0": "Arawan" + }, + "arv": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Western Omo-Tana" + }, + "arw": { + "level0": "Arawakan", + "level1": "Caribbean Arawakan", + "level2": "Antillean Arawakan", + "level3": "Ineric" + }, + "arx": { + "level0": "Tupian", + "level1": "Monde", + "level2": "Gavianic", + "level3": "Nuclear Gavianic" + }, + "ary": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic", + "level7": "Moroccan-Andalusian Arabic" + }, + "arz": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Egyptic Arabic", + "level7": "Egypto-Sudanic Arabic" + }, + "asa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Pare-Taveta", + "level10": "Pareic" + }, + "asb": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dakotan", + "level3": "Nakoda" + }, + "asc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Asmat" + }, + "ase": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "American Sign" + }, + "asf": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic", + "level3": "BANZL", + "level4": "Auslanic" + }, + "asg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Kambari-Cicipu", + "level6": "Kambaric", + "level7": "West Kambaric" + }, + "asi": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Sabakor" + }, + "asj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Nsari-Nooni-Ncane" + }, + "ask": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Nuristani" + }, + "asl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "West Piru Bay" + }, + "asm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Kamrupa", + "level10": "Eastern Kamrupa" + }, + "asn": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup V" + }, + "aso": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Gahuku" + }, + "asp": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic" + }, + "asq": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Central European Sign", + "level4": "Nuclear Central European Sign" + }, + "asr": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric", + "level5": "Asuric" + }, + "ass": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid B" + }, + "ast": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Asturo-Leonese" + }, + "asu": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup IV", + "level6": "Tupi-Guarani Subgroup IV.A" + }, + "asv": { + "level0": "Central Sudanic", + "level1": "Mangbetu-Asua", + "level2": "Mangbetuic" + }, + "asw": { + "level0": "Sign Language", + "level1": "Auxiliary Sign Systems" + }, + "asx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup", + "level4": "Nuclear Warup" + }, + "asy": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Asmat", + "level4": "Central-Yaosakor Asmat" + }, + "asz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera" + }, + "atb": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Northern Burmish", + "level5": "Maruic" + }, + "atc": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Madre de Dios Pano" + }, + "atd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "East and Central Manobo", + "level7": "Central Manobo" + }, + "ate": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "Apalic", + "level6": "Greater West Sogeram", + "level7": "West Sogeram" + }, + "atf": { + "level0": "Bookkeeping" + }, + "atg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Igwic", + "level7": "Ikpeshic" + }, + "ati": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo" + }, + "atj": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi" + }, + "atk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian" + }, + "atl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol", + "level5": "Inagta Bikol" + }, + "atm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Unclassified Bisayan" + }, + "atn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Southern Tatic", + "level10": "Vafsic" + }, + "ato": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Southwest Grassfields", + "level8": "Menka-Atong" + }, + "atp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Atta" + }, + "atq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Pitu Ulunna Salu", + "level6": "Matangnga-Aralle-Tabulahan" + }, + "atr": { + "level0": "Cariban", + "level1": "Yawaperi" + }, + "ats": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Arapahoic", + "level4": "Arapaho-Gros Ventre-Besawunena" + }, + "att": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Atta" + }, + "atu": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Dinka-Nuer", + "level3": "Nuer-Reel" + }, + "atv": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "South Siberian Turkic", + "level3": "Northern Altai-Lower Chulym" + }, + "atw": { + "level0": "Palaihnihan" + }, + "aty": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu" + }, + "atz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon" + }, + "aua": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Utupua-Vanikoro", + "level6": "Utupua" + }, + "aub": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Riverine Phula", + "level7": "Downriver Riverine Phula", + "level8": "Phupha-Alugu" + }, + "aud": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian" + }, + "aug": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Western Gbe", + "level5": "Eweic" + }, + "auh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi", + "level8": "Malungu-Central Sabi", + "level9": "Central Sabi", + "level10": "Bemba (M.40)" + }, + "aui": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage" + }, + "auj": { + "level0": "Afro-Asiatic", + "level1": "Berber" + }, + "auk": { + "level0": "Nuclear Torricelli", + "level1": "Nuclear Maimai", + "level2": "Heyo-Yahang" + }, + "aul": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage", + "level8": "Central-Southeast Malakula" + }, + "aum": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid" + }, + "aun": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei", + "level2": "One", + "level3": "Central-Northern One" + }, + "auo": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.1", + "level5": "Ngizim-Southwestern Bade", + "level6": "Shira-Southwestern Bade", + "level7": "Shira" + }, + "aup": { + "level0": "Anim", + "level1": "Tirio", + "level2": "Nuclear Tirio" + }, + "auq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Anus-Podena" + }, + "aur": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Kombio-Yambes", + "level3": "Unclassified Kombio-Yambes" + }, + "aut": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal", + "level14": "Tahitian-Austral" + }, + "auu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Paniai Lakes", + "level2": "Auye-Dao" + }, + "auv": { + "level0": "Bookkeeping" + }, + "auw": { + "level0": "Border", + "level1": "Taikat-Awyi" + }, + "aux": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VIII", + "level6": "Guaja-Kaapor-Ava", + "level7": "Guaja-Aure-Aura" + }, + "auy": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Gauwa", + "level4": "Auyana" + }, + "auz": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Eastern Arabic", + "level7": "Central Asian Arabic", + "level8": "Xorasan-Qashqa-Darya Arabic" + }, + "ava": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic" + }, + "avb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Arawe", + "level11": "East Arawe" + }, + "avd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Southern Tatic" + }, + "ave": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian" + }, + "avi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Avikam-Alladian" + }, + "avk": { + "level0": "Artificial Language" + }, + "avl": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Egyptic Arabic" + }, + "avm": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama", + "level3": "Gudang-Northeast Paman", + "level4": "Northeast Paman", + "level5": "Uradhic", + "level6": "Yadhaykenu-Angkamuthi" + }, + "avn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Avatime-Nyangbo" + }, + "avo": { + "level0": "Unattested", + "level1": "Arawakan (Unattested)" + }, + "avs": { + "level0": "Zaparoan", + "level1": "Zaparo-Abishira" + }, + "avt": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Au-Olo-Elkei" + }, + "avu": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Central Moru-Madi" + }, + "avv": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VIII", + "level6": "Guaja-Kaapor-Ava" + }, + "awa": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Eastern Hindi", + "level9": "Awadhic" + }, + "awb": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Gauwa", + "level4": "Awa-Oweina" + }, + "awc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Kambari-Cicipu" + }, + "awe": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani" + }, + "awg": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama", + "level3": "Albatross Bay", + "level4": "Anguthimri-Yangathimri-Yuputhimri", + "level5": "Anguthimri-Yangathimri" + }, + "awh": { + "level0": "Bayono-Awbono" + }, + "awi": { + "level0": "Kamula-Elevala", + "level1": "Elevala" + }, + "awk": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Yuin-Kuri", + "level4": "Kuri", + "level5": "Hunter-Hastings" + }, + "awm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Kabenau" + }, + "awn": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "Agaw" + }, + "awo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja", + "level7": "Tulaic", + "level8": "Tula-Ma-Yebu", + "level9": "Awak-Kamo" + }, + "awr": { + "level0": "Lakes Plain", + "level1": "Far West Lakes Plain" + }, + "aws": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Awyu" + }, + "awt": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup V", + "level6": "Arawetic" + }, + "awu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Awyu", + "level6": "Mappi-Digul Awyu" + }, + "awv": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Awyu" + }, + "aww": { + "level0": "Sepik", + "level1": "Yellow River" + }, + "awx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Wantoatic", + "level4": "Wantoat-Awara" + }, + "awy": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Awyu" + }, + "axb": { + "level0": "Guaicuruan", + "level1": "Guaicuru del Sur" + }, + "axk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Bwamba-Ngondi-Pande-Mbati-Aka" + }, + "axl": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Arandic", + "level3": "South Arandic" + }, + "axx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian" + }, + "aya": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ottilien", + "level3": "Bosngun-Awar" + }, + "ayb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Western Phla-Phera" + }, + "ayc": { + "level0": "Aymaran", + "level1": "Central-Southern Aymara" + }, + "ayd": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Northeastern Pama", + "level4": "Umbindhamuic" + }, + "aye": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Ayere-Ahan" + }, + "ayg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "Mountain Oti North Guang", + "level10": "Gikyode-Ginyanga" + }, + "ayh": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic" + }, + "ayi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross", + "level7": "Mbembe-Legbo", + "level8": "Legboic", + "level9": "Lenyima-Leyigha" + }, + "ayk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Southern Northwestern Edoid", + "level7": "Okpe-Akuku-Idesa" + }, + "ayl": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic" + }, + "ayn": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "Judeo-Muslim Sanaani Arabic" + }, + "ayo": { + "level0": "Zamucoan", + "level1": "Zamuco-Ayoreo" + }, + "ayp": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Eastern Arabic", + "level7": "Qeltu" + }, + "ayq": { + "level0": "Sepik", + "level1": "Sepik Tama", + "level2": "Mayo-Pasi", + "level3": "Yimin-Bel" + }, + "ayr": { + "level0": "Aymaran", + "level1": "Central-Southern Aymara" + }, + "ays": { + "level0": "Unattested", + "level1": "Austronesian (Unattested)" + }, + "ayt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic" + }, + "ayu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic" + }, + "ayx": { + "level0": "Bookkeeping" + }, + "ayy": { + "level0": "Unattested", + "level1": "Austronesian (Unattested)" + }, + "ayz": { + "level0": "Maybratic" + }, + "aza": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Axioid" + }, + "azb": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz", + "level4": "Central Oghuz" + }, + "azd": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Western Periphery-North Guerrero Nahuatl", + "level6": "Western Periphery Nahuatl", + "level7": "Durango Nahuatl" + }, + "azg": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Amuzgoan" + }, + "azj": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz", + "level4": "Central Oghuz" + }, + "azm": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Amuzgoan" + }, + "azn": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Western Periphery-North Guerrero Nahuatl", + "level6": "Western Periphery Nahuatl", + "level7": "Durango Nahuatl" + }, + "azo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic" + }, + "azr": { + "level0": "Bookkeeping" + }, + "azt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Atta" + }, + "azz": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Sierra de Puebla Nahuatl" + }, + "baa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Choiseul", + "level10": "East Choiseul", + "level11": "Southeast Choiseul" + }, + "bab": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Wolof-BKK", + "level3": "Nyun", + "level4": "Bainounk" + }, + "bae": { + "level0": "Arawakan", + "level1": "Medio Rio Negro", + "level2": "Bareic" + }, + "baf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam" + }, + "bag": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Sanaga-West Mbam (A.40)", + "level10": "Sanaga (A.60)" + }, + "bah": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Gullah-Nevis-Antigua", + "level15": "Gullah", + "level16": "Bahamian Gullah" + }, + "baj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Karey-Barakai" + }, + "bak": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "North Kipchak", + "level6": "Bashkiric" + }, + "bam": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding" + }, + "ban": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bali-Sasak-Sumbawa" + }, + "bao": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan I", + "level4": "Bara-Tatuyo" + }, + "bap": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Southern Kiranti", + "level6": "Bantawic" + }, + "bar": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Bairisch" + }, + "bas": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Basaa (A.40)", + "level9": "Basaa-Bakoko", + "level10": "Basaa-Hijuk" + }, + "bau": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Jarawaic" + }, + "bav": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "South Ring", + "level9": "Babungoic" + }, + "baw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic", + "level10": "Mankonic" + }, + "bax": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun" + }, + "bay": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Batuley-Mariri" + }, + "baz": { + "level0": "Bookkeeping" + }, + "bba": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur" + }, + "bbb": { + "level0": "Koiarian", + "level1": "Baraic", + "level2": "Barai-Namiae" + }, + "bbc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Batakic", + "level4": "Central-Southern Batak", + "level5": "Southern Batak" + }, + "bbd": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Gum", + "level5": "Panim-Isebe-Bau" + }, + "bbe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Eastern Mundu-Baka", + "level7": "Mayogo-Bangba" + }, + "bbf": { + "level0": "Baibai-Fas" + }, + "bbg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic", + "level21": "Lumbuic", + "level22": "Ngubi-Sangu-Sira-Punu", + "level23": "Sangu-Sira-Punu", + "level24": "Sangu-Sira", + "level25": "Sira-Barama" + }, + "bbh": { + "level0": "Austroasiatic", + "level1": "Mangic", + "level2": "Pakanic" + }, + "bbi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Basaa (A.40)", + "level9": "Abo-Barombi" + }, + "bbj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "East Bamileke" + }, + "bbk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "Center Ring" + }, + "bbl": { + "level0": "Nakh-Daghestanian", + "level1": "Nakh" + }, + "bbm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Mongala", + "level11": "Motemboic", + "level12": "Bujaic", + "level13": "Budja (C.37)" + }, + "bbn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "Bali-Vitu" + }, + "bbo": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Bobo" + }, + "bbp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic" + }, + "bbq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun", + "level10": "Nun MCNB" + }, + "bbr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Kokon" + }, + "bbs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Agoi-Doko-Iyoniyong" + }, + "bbt": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2" + }, + "bbu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan" + }, + "bbv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Korap linkage" + }, + "bbw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun" + }, + "bbx": { + "level0": "Bookkeeping" + }, + "bby": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields" + }, + "bbz": { + "level0": "Bookkeeping" + }, + "bca": { + "level0": "Sino-Tibetan", + "level1": "Macro-Bai", + "level2": "Baic", + "level3": "South-Central Bai" + }, + "bcb": { + "level0": "Bookkeeping" + }, + "bcc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Balochic", + "level8": "Southern-Western Balochi", + "level9": "Southern Balochi-Koroshi" + }, + "bcd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "North Babaric" + }, + "bce": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun", + "level10": "Nun MCNB" + }, + "bcf": { + "level0": "Kiwaian" + }, + "bcg": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Naluic" + }, + "bch": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Ngero", + "level8": "Eastern Ngero" + }, + "bci": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Northern Bia" + }, + "bcj": { + "level0": "Nyulnyulan", + "level1": "Western Nyulnyulan", + "level2": "Bardic" + }, + "bck": { + "level0": "Bunaban" + }, + "bcl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol", + "level5": "Coastal Bikol" + }, + "bcm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Banoni-Piva" + }, + "bcn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Yandangic", + "level7": "Bali-Kpasam" + }, + "bco": { + "level0": "Bosavi", + "level1": "Bosavi Watershed", + "level2": "Kaluli-Sunia" + }, + "bcp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Bali-Beeke" + }, + "bcq": { + "level0": "Ta-Ne-Omotic" + }, + "bcr": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central British Columbia Athabaskan", + "level4": "Carrieric" + }, + "bcs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "North-South Central Delta Cross", + "level7": "Ubaghara-Kohumono", + "level8": "Kohumonoic" + }, + "bct": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe", + "level2": "Mangbutu-Efe", + "level3": "Leseic" + }, + "bcu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Eastern Bel" + }, + "bcv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Wurbo-Wannu", + "level7": "Wurbo" + }, + "bcw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Higic", + "level5": "Nkafa-Kirya-Bana" + }, + "bcx": { + "level0": "Bookkeeping" + }, + "bcy": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Bata-Bwatiye" + }, + "bcz": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Wolof-BKK", + "level3": "Nyun", + "level4": "Bainounk" + }, + "bda": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "Bayot" + }, + "bdb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito" + }, + "bdc": { + "level0": "Chocoan", + "level1": "Embera", + "level2": "Atrato", + "level3": "Panama-Baudo-Atrato" + }, + "bdd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Dobu-Duau linkage", + "level9": "Bunama-Mwatebu" + }, + "bde": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.1", + "level5": "Ngizim-Southwestern Bade", + "level6": "Shira-Southwestern Bade" + }, + "bdf": { + "level0": "Koiarian", + "level1": "Koiaric", + "level2": "Biage-Mountain Koiali" + }, + "bdg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Northeast Sabahan" + }, + "bdh": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "Baka-Beli" + }, + "bdi": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Burun" + }, + "bdj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Sere-Indri", + "level7": "Sere-Bviri", + "level8": "Bai-Viri" + }, + "bdk": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Southern Samur" + }, + "bdl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw", + "level5": "Sulu-Borneo", + "level6": "Borneo Coast Bajaw" + }, + "bdm": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma" + }, + "bdn": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Maroua" + }, + "bdo": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Bagirmic", + "level6": "Morom-Jaya-Naba", + "level7": "Bayo-Morom" + }, + "bdp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Unclassified Northeast Savanna Bantu", + "level9": "Bende-Tongwe" + }, + "bdq": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Tampuon-Bahnar" + }, + "bdr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw", + "level5": "Sulu-Borneo", + "level6": "Borneo Coast Bajaw" + }, + "bds": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "South Cushitic", + "level3": "Greater West Rift South Cushitic", + "level4": "West Rift South Cushitic" + }, + "bdt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Meridional-Occidental", + "level5": "Bokoto-Gbeya" + }, + "bdu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Bafawic-Bakweric", + "level11": "Bafawic" + }, + "bdv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Macro-Oriya" + }, + "bdw": { + "level0": "West Bomberai", + "level1": "Nuclear West Bomberai" + }, + "bdx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Seko", + "level5": "Panasuanic" + }, + "bdy": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Greater Bandjalangic", + "level4": "Bandjalangic", + "level5": "Inland Bandjalang" + }, + "bdz": { + "level0": "Unattested", + "level1": "Indo-European (Unattested)" + }, + "bea": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Cordillera Athabaskan", + "level5": "Beaver-Sekani" + }, + "beb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Yaunde-Fang (A.70)", + "level9": "Ewondo-Bebele" + }, + "bec": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid A" + }, + "bed": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuta" + }, + "bee": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Pithauragarh", + "level5": "Darma-Byangsi-Chaudangsi", + "level6": "Darma-Byangsi" + }, + "bef": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria" + }, + "beg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Lower Baram", + "level6": "Central Lower Baram A" + }, + "beh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Oti-Volta Oriental" + }, + "bei": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Benyadu-Bekati", + "level4": "Bakati'" + }, + "bej": { + "level0": "Afro-Asiatic", + "level1": "Cushitic" + }, + "bek": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Bebeli-Mangseng" + }, + "bel": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "East Slavic" + }, + "bem": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi", + "level8": "Malungu-Central Sabi", + "level9": "Central Sabi", + "level10": "Bemba (M.40)" + }, + "ben": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga" + }, + "beo": { + "level0": "Bosavi", + "level1": "Etoro-Bedamini" + }, + "bep": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Badaic-Limola", + "level5": "Badaic" + }, + "beq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic" + }, + "bes": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Kim-Besme-Goundo" + }, + "bet": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Beteic", + "level3": "Western Bete" + }, + "beu": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "Kaera-Straits", + "level5": "Blagaric" + }, + "bev": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Beteic", + "level3": "Western Bete" + }, + "bew": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Betawic" + }, + "bex": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "Baka-Beli", + "level3": "Morokodo-Beli", + "level4": "Lori" + }, + "bey": { + "level0": "Nuclear Torricelli" + }, + "bez": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Bena-Hehe" + }, + "bfa": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Barian", + "level3": "Nuclear Barian" + }, + "bfb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Pauri-Nahali" + }, + "bfc": { + "level0": "Sino-Tibetan", + "level1": "Macro-Bai", + "level2": "Baic" + }, + "bfd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic", + "level10": "Mankonic" + }, + "bfe": { + "level0": "Tor-Orya", + "level1": "Tor", + "level2": "Coastal Tor", + "level3": "Betaf-Vitou" + }, + "bff": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Oriental", + "level5": "Mbodomo-Bofi" + }, + "bfg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik", + "level5": "Kayanic", + "level6": "Rejang-Makaham Kayan" + }, + "bfh": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Tonda" + }, + "bfi": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic", + "level3": "BANZL" + }, + "bfj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun" + }, + "bfk": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "bfl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic" + }, + "bfm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "Center Ring", + "level10": "Mmen-Bum" + }, + "bfn": { + "level0": "Timor-Alor-Pantar" + }, + "bfo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Safaliba-Dagaare", + "level14": "Dagaaric", + "level15": "North-West Dagaric", + "level16": "Birifor" + }, + "bfp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic" + }, + "bfq": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada" + }, + "bfr": { + "level0": "Unclassifiable" + }, + "bfs": { + "level0": "Sino-Tibetan", + "level1": "Macro-Bai", + "level2": "Baic", + "level3": "South-Central Bai" + }, + "bft": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Western Archaic Tibetan", + "level5": "Shamskatic" + }, + "bfu": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Central-Eastern West Himalayish" + }, + "bfw": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "Gutob-Remo" + }, + "bfx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Peripheral Central Bisayan" + }, + "bfy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Eastern Hindi", + "level9": "Awadhic" + }, + "bfz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Nuclear Himachali" + }, + "bga": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Northwestern Kainji", + "level6": "Dukaic", + "level7": "Main-Gwamhi" + }, + "bgb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Western Saluan-Banggai", + "level7": "Saluanic" + }, + "bgc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi" + }, + "bgd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Pauri-Nahali" + }, + "bge": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "bgf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Meridional-Occidental", + "level5": "Gbaya Meridional" + }, + "bgg": { + "level0": "Sino-Tibetan", + "level1": "Kho-Bwa" + }, + "bgh": { + "level0": "Bookkeeping" + }, + "bgi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bilic" + }, + "bgj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun" + }, + "bgk": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Khao-Bit" + }, + "bgn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Balochic", + "level8": "Southern-Western Balochi" + }, + "bgo": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Northern Mel" + }, + "bgp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Balochic" + }, + "bgq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Bagri-Jandavra" + }, + "bgr": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Lai Chin", + "level6": "Falamic" + }, + "bgs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "South Manobo" + }, + "bgt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Guadalcanal-Nggelic", + "level6": "Nuclear Guadalcanal-Nggelic", + "level7": "Nggelic" + }, + "bgu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mbongno-Mvano" + }, + "bgv": { + "level0": "Anim", + "level1": "Marind-Boazi-Yaqai", + "level2": "Yaqayic" + }, + "bgw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Halbic" + }, + "bgx": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz", + "level4": "West Oghuz" + }, + "bgy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "East Seram", + "level4": "Setic" + }, + "bgz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Eastern Saluan-Banggai" + }, + "bha": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Bundeli-Bharia" + }, + "bhb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "bhc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera" + }, + "bhd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic", + "level9": "Bhadrawahi-Bhalesi-Curahi", + "level10": "Bhadarwahic" + }, + "bhe": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Western Rajasthani" + }, + "bhg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean" + }, + "bhh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic", + "level9": "Eastern Farsic", + "level10": "Tajikic" + }, + "bhi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Pauri-Nahali" + }, + "bhj": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Northwestern Kiranti", + "level5": "Bahing-Sunwar" + }, + "bhk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol", + "level5": "Inland Bikol" + }, + "bhl": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Division A Mountain Ok" + }, + "bhm": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Modern South Arabian", + "level4": "Hobyot-Western MSA", + "level5": "Western MSA" + }, + "bhn": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "Bohtan" + }, + "bho": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan", + "level10": "Bhojpuric" + }, + "bhp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata" + }, + "bhq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Tukangbesi-Bonerate", + "level8": "Tukang Besi" + }, + "bhr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "Southwestern Malagasic", + "level7": "South West-Central Malagasic", + "level8": "Nuclear South West-Central Malagasic", + "level9": "Inland-Western Malagasic", + "level10": "Bara-Tanosy" + }, + "bhs": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Dabaic", + "level5": "Buwal-Gavar" + }, + "bht": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic" + }, + "bhu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Halbic" + }, + "bhv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik", + "level5": "Kayanic" + }, + "bhw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Biakic", + "level6": "Biak-Roon" + }, + "bhx": { + "level0": "Bookkeeping" + }, + "bhy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Komoic", + "level15": "Bilaic" + }, + "bhz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Badaic-Limola", + "level5": "Badaic" + }, + "bia": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Kartu-Nhanda", + "level3": "Kartu" + }, + "bib": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa" + }, + "bic": { + "level0": "Bookkeeping" + }, + "bid": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit", + "level6": "Dangla" + }, + "bie": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Kumil" + }, + "bif": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Jaad" + }, + "big": { + "level0": "Kunimaipan" + }, + "bii": { + "level0": "Bookkeeping" + }, + "bil": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Numan" + }, + "bim": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Gurma", + "level12": "Gurma B", + "level13": "Gourmantche-Moba", + "level14": "Moba-Bimoba" + }, + "bin": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "North-Central Edoid", + "level6": "Central Plains Edoid" + }, + "bio": { + "level0": "Kwomtari-Nai" + }, + "bip": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Komoic", + "level15": "Bilaic", + "level16": "Bila-Kaiku" + }, + "biq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus I" + }, + "bir": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Engan", + "level3": "Outer Enga" + }, + "bis": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Pacific Creole English", + "level12": "Early Melanesian Pidgin" + }, + "bit": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Central Sepik Hill", + "level3": "Bahinemic" + }, + "biu": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Mizoic", + "level6": "Hmaric" + }, + "biv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Safaliba-Dagaare", + "level14": "Dagaaric", + "level15": "North-West Dagaric", + "level16": "Birifor" + }, + "biw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Western A80", + "level10": "Makaaic", + "level11": "Southern Makaaic" + }, + "bix": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric", + "level5": "Asuric" + }, + "biy": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric" + }, + "biz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain" + }, + "bja": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Mongala", + "level11": "Motemboic", + "level12": "Bujaic", + "level13": "Budja (C.37)" + }, + "bjb": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Northern Thura-Yura" + }, + "bjc": { + "level0": "Yareban", + "level1": "Yareba-Bariji-Nawaru" + }, + "bjd": { + "level0": "Bookkeeping" + }, + "bje": { + "level0": "Hmong-Mien", + "level1": "Mienic" + }, + "bjf": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic", + "level11": "Northwestern Jewish Neo-Aramaic" + }, + "bjg": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bijogo" + }, + "bjh": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Central Sepik Hill", + "level3": "Bahinemic" + }, + "bji": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Highland East Cushitic" + }, + "bjj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi" + }, + "bjk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Madak linkage" + }, + "bjl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "Willaumez linkage", + "level7": "Bola-Bulu" + }, + "bjm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Gorani", + "level9": "Shabak-Bajalani" + }, + "bjn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "East Borneo Malay", + "level6": "Banjar-Berau-Brunei Malay", + "level7": "Banjar-Bukit Malay" + }, + "bjo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic", + "level9": "Mid-Southern Central Core Bandaic" + }, + "bjq": { + "level0": "Bookkeeping" + }, + "bjr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Tairora" + }, + "bjs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Barbados-Eustatius", + "level15": "Barbados-Trinidad" + }, + "bjt": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Balanta" + }, + "bju": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid" + }, + "bjv": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone-Chari", + "level7": "Bediondo" + }, + "bjw": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Bakwe-Wane" + }, + "bjx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Northern Kalinga", + "level9": "Northwest Kalinga" + }, + "bjy": { + "level0": "Pama-Nyungan", + "level1": "Rockhampton-Gladstone" + }, + "bjz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Coastal Binanderean", + "level6": "Baruga-Doghoro" + }, + "bka": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Southern Bikwin-Jen", + "level6": "Bambuka-Gomu-Leelau" + }, + "bkb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Bontok-Kankanay", + "level8": "Bontok" + }, + "bkc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "Baka-Gundi", + "level8": "Baka complex" + }, + "bkd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "North Manobo", + "level5": "Kinamiguin-Bukidnon", + "level6": "Bukidnon" + }, + "bkf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Bali-Beeke" + }, + "bkg": { + "level0": "Bookkeeping" + }, + "bkh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Basaa (A.40)", + "level9": "Basaa-Bakoko" + }, + "bki": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Epi", + "level8": "Baki-Bierebo" + }, + "bkj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Bwamba-Ngondi-Pande-Mbati-Aka" + }, + "bkk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic" + }, + "bkl": { + "level0": "Tor-Orya", + "level1": "Tor" + }, + "bkm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "Center Ring", + "level10": "Komic" + }, + "bkn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Bukat-Ukit-Beketan-Lugat-Lisum" + }, + "bko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "East Bamileke" + }, + "bkp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Interieur", + "level12": "Lobalic" + }, + "bkq": { + "level0": "Cariban", + "level1": "Pekodian" + }, + "bkr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "South West Greater Barito" + }, + "bks": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Peripheral Central Bisayan", + "level7": "Masbate-Sorsogon" + }, + "bkt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain" + }, + "bku": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "South Mangyan", + "level4": "Buhid-Taubuid" + }, + "bkv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic" + }, + "bkw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Mpoic" + }, + "bkx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "West Rote", + "level5": "Dengka-Meto", + "level6": "Meto", + "level7": "Central Meto" + }, + "bky": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic" + }, + "bkz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki", + "level8": "East Coast Bungku-Tolaki" + }, + "bla": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot" + }, + "blc": { + "level0": "Salishan" + }, + "bld": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Gorontalic" + }, + "ble": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Balanta" + }, + "blf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Gorontalic" + }, + "blg": { + "level0": "Bookkeeping" + }, + "blh": { + "level0": "Kru", + "level1": "Greater Western Kru" + }, + "bli": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Mongoic", + "level11": "Bolia-Ntomba" + }, + "blj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan" + }, + "blk": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Peripheral Karen" + }, + "bll": { + "level0": "Siouan", + "level1": "Ohio Valley Siouan", + "level2": "Southeastern Siouan" + }, + "blm": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "Baka-Beli", + "level3": "Morokodo-Beli" + }, + "bln": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol", + "level5": "Coastal Bikol" + }, + "blo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo", + "level4": "Basila-Adele" + }, + "blp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Santa Isabel", + "level10": "Central Santa Isabel", + "level11": "Zazao-Blanga" + }, + "blq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "South-East Admiralty", + "level7": "Lou-Paluai" + }, + "blr": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Waic", + "level5": "Bulangic" + }, + "bls": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Southern Tomini" + }, + "blt": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "Black Tai" + }, + "blu": { + "level0": "Bookkeeping" + }, + "blv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbundu (H.20)" + }, + "blw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran" + }, + "blx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic", + "level4": "Mag-Ayta" + }, + "bly": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental" + }, + "blz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Eastern Saluan-Banggai" + }, + "bma": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan" + }, + "bmb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Bembe-Buyu" + }, + "bmc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Manamic linkage", + "level9": "Bam-Manam" + }, + "bmd": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Northern Mel" + }, + "bme": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "Baka-Gundi" + }, + "bmf": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Southern Mel", + "level3": "Bullom", + "level4": "Northern Bullom" + }, + "bmg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Bamweic" + }, + "bmh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Kokon" + }, + "bmi": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Bagirmic" + }, + "bmj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Unclassified Bihari", + "level10": "Kuswaric" + }, + "bmk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Are linkage", + "level10": "Boanaki-Paiwa" + }, + "bml": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Bombomic" + }, + "bmm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "North-Central Malagasic", + "level7": "Northern Malagasic", + "level8": "Tsimihety-Betsimisaraka" + }, + "bmn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "Oumic", + "level9": "Magoric" + }, + "bmo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun" + }, + "bmp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup", + "level4": "Nuclear Warup", + "level5": "Unclassified Nuclear Warup" + }, + "bmq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Bwamu" + }, + "bmr": { + "level0": "Boran" + }, + "bms": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Kanuri-Kanembu", + "level3": "Kanuric", + "level4": "East Kanuri" + }, + "bmt": { + "level0": "Hmong-Mien", + "level1": "Mienic", + "level2": "Mien-Mun" + }, + "bmu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Rawlinson", + "level5": "Pindiu", + "level6": "Kosorong-Burum-Mindik" + }, + "bmv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "Center Ring", + "level10": "Mmen-Bum" + }, + "bmw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)" + }, + "bmx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Garuh-Foran" + }, + "bmy": { + "level0": "Bookkeeping" + }, + "bmz": { + "level0": "Anim", + "level1": "Tirio", + "level2": "Nuclear Tirio" + }, + "bna": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Tukangbesi-Bonerate" + }, + "bnb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic", + "level8": "Lowland Murut" + }, + "bnd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "Banda-Geser" + }, + "bne": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Gorontalic" + }, + "bnf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "East Seram", + "level4": "East Rivers Seram" + }, + "bng": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Bengaic" + }, + "bnh": { + "level0": "Bookkeeping" + }, + "bni": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain", + "level14": "Bobangi-Bangala-Lingala" + }, + "bnj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "South Mangyan", + "level4": "Buhid-Taubuid", + "level5": "Batangan" + }, + "bnk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Epi", + "level8": "Baki-Bierebo" + }, + "bnl": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Unclassified East Cushitic" + }, + "bnm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Bengaic" + }, + "bnn": { + "level0": "Austronesian" + }, + "bno": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan" + }, + "bnp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "Willaumez linkage", + "level7": "Bola-Bulu" + }, + "bnq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sangiric", + "level3": "Southern Sangiric" + }, + "bnr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "East Santo", + "level9": "Southeast Santo" + }, + "bns": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Bundeli-Bharia" + }, + "bnu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Makassaric", + "level5": "Nuclear Makassaric" + }, + "bnv": { + "level0": "Tor-Orya", + "level1": "Tor", + "level2": "Coastal Tor" + }, + "bnw": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Central Sepik Hill", + "level3": "Nuclear Central Sepik Hill" + }, + "bnx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde", + "level9": "Lubaic", + "level10": "Bangubangu-Kasai" + }, + "bny": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan" + }, + "bnz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Yukubenic", + "level5": "Akum-Beezen", + "level6": "Beezen-Baazem" + }, + "boa": { + "level0": "Boran" + }, + "bob": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana", + "level8": "Karre-Boni" + }, + "boc": { + "level0": "Bookkeeping" + }, + "bod": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan" + }, + "boe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Yemne-Kimbi", + "level6": "Ji" + }, + "bof": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding" + }, + "bog": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "boh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Kwa-Kasai North" + }, + "boi": { + "level0": "Chumashan", + "level1": "Southern Chumashan", + "level2": "Central Chumashan" + }, + "boj": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Mindjim", + "level4": "Lower Minjim", + "level5": "Coastal Minjim" + }, + "bok": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Impfondoic" + }, + "bol": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Galambu-Bele", + "level9": "Kirfi-Bele", + "level10": "Ngamo-Bele", + "level11": "Bolanci-Bele" + }, + "bom": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Beromic", + "level5": "Iten-Cara-Berom", + "level6": "Cara-Berom" + }, + "bon": { + "level0": "Eastern Trans-Fly" + }, + "boo": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Soninke-Bozo", + "level4": "Bozo", + "level5": "Nuclear Bozo", + "level6": "Ti-Bozo" + }, + "bop": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Yupna", + "level4": "Kewieng-Bonkiman-Nokopo" + }, + "bor": { + "level0": "Bororoan", + "level1": "Bororo-Otuke" + }, + "bos": { + "level0": "Indo-European", + "level1": "Balto-Slavic", + "level2": "Slavic" + }, + "bot": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi" + }, + "bou": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "West Ruvu", + "level11": "Seuta", + "level12": "Bondei-Shambala" + }, + "bov": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Kposo-Ahlo-Bowili" + }, + "bow": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Tonda" + }, + "box": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Bwamu", + "level8": "Nuclear Bwamu" + }, + "boy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Ngbele-Ngenda", + "level15": "Ngendan", + "level16": "Unclassified Ngendan" + }, + "boz": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Soninke-Bozo", + "level4": "Bozo", + "level5": "Nuclear Bozo", + "level6": "Ti-Bozo" + }, + "bpa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Ambrym", + "level7": "Orkon-West Ambrym", + "level8": "West Ambrym", + "level9": "Southwest Ambrym" + }, + "bpb": { + "level0": "Unattested", + "level1": "Barbacoan (Unattested)" + }, + "bpc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Nsari-Nooni-Ncane", + "level8": "Nooni-Ncane", + "level9": "Ncane-Cung", + "level10": "Cung-Mbuk" + }, + "bpd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic" + }, + "bpe": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Barupu Lagoon" + }, + "bpg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi" + }, + "bph": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic", + "level4": "Botlikh-Godoberi" + }, + "bpi": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Silopic" + }, + "bpj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Mbagani-Lwalwa" + }, + "bpk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian" + }, + "bpl": { + "level0": "Pidgin", + "level1": "Malay-based pidgin" + }, + "bpm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Unclassified Rai Coast" + }, + "bpn": { + "level0": "Hmong-Mien", + "level1": "Mienic", + "level2": "Zaominic" + }, + "bpo": { + "level0": "Bookkeeping" + }, + "bpp": { + "level0": "Kaure-Kosare" + }, + "bpq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay", + "level7": "Ambonic Malay" + }, + "bpr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bilic", + "level3": "Tboli-Blaan", + "level4": "Blaan" + }, + "bps": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bilic", + "level3": "Tboli-Blaan", + "level4": "Blaan" + }, + "bpt": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Flinders-Barrow" + }, + "bpu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Mindjim", + "level4": "Lower Minjim", + "level5": "Coastal Minjim" + }, + "bpv": { + "level0": "Anim", + "level1": "Marind-Boazi-Yaqai", + "level2": "Marindic" + }, + "bpw": { + "level0": "Left May", + "level1": "Western Left May", + "level2": "Iteri-Bo" + }, + "bpx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Rathawi-Palya" + }, + "bpy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga" + }, + "bpz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "Nuclear Rote", + "level5": "Central East Rote", + "level6": "Southeast Rote" + }, + "bqa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "River Oti North Guang", + "level10": "Chumbuli" + }, + "bqb": { + "level0": "Greater Kwerba", + "level1": "Kwerba-Samarokena", + "level2": "Kwerbaic" + }, + "bqc": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Busan", + "level5": "Boko-Busa" + }, + "bqd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Konja" + }, + "bqf": { + "level0": "Bookkeeping" + }, + "bqg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Eastern Grusi", + "level9": "Tem-Chala", + "level10": "Bago-Delo-Cala" + }, + "bqh": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Eastern Tibetic" + }, + "bqi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Luric-Dezfulic", + "level8": "Luric", + "level9": "Bakhtiari-Southern Lori" + }, + "bqj": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "Gusilay-Bandial" + }, + "bqk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic" + }, + "bql": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Numugenan", + "level6": "Karian-Usan-Yaben" + }, + "bqm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Bafawic-Bakweric", + "level11": "Bakweric" + }, + "bqn": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Central European Sign" + }, + "bqo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Southwest Grassfields" + }, + "bqp": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Busan", + "level5": "Boko-Busa" + }, + "bqq": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Eritai-Obokuitai-Biritai" + }, + "bqr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic", + "level8": "Sumambu-Tagal", + "level9": "Tidung-Bulusu" + }, + "bqs": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ottilien", + "level3": "Bosngun-Awar" + }, + "bqt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun", + "level10": "Nun MCNB" + }, + "bqu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Ngbele-Ngenda", + "level15": "Ngendan", + "level16": "Unclassified Ngendan" + }, + "bqv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Koroic", + "level7": "Tinoric" + }, + "bqw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid" + }, + "bqx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Kambari-Cicipu", + "level6": "Kambaric", + "level7": "East Kambaric" + }, + "bqy": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "bqz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Manenguba", + "level11": "Central Manenguba" + }, + "bra": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi" + }, + "brb": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Nuclear West Bahnaric" + }, + "brc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Southwestern Dutch", + "level9": "Zeeuwic" + }, + "brd": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Newaric", + "level4": "Thangmi-Baram" + }, + "bre": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Brythonic", + "level7": "Southwestern Brythonic", + "level8": "Middle-Modern Southwestern Brythonic", + "level9": "Bretonic" + }, + "brf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Komoic" + }, + "brg": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Bolivian Arawakan" + }, + "brh": { + "level0": "Dravidian", + "level1": "North Dravidian" + }, + "bri": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Bafawic-Bakweric", + "level11": "Bakweric" + }, + "brj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Epi", + "level8": "Bieria-Maii" + }, + "brk": { + "level0": "Nubian", + "level1": "Central Nubian" + }, + "brl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Sotho-Tswana (S.30)", + "level11": "Northern Sotho" + }, + "brm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Zandic", + "level6": "Barambo-Pambia" + }, + "brn": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Western Isthmic Chibchan" + }, + "bro": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic" + }, + "brp": { + "level0": "Geelvink Bay", + "level1": "Barapasi-Sauri-Kofei" + }, + "brq": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Tamolan", + "level3": "Breri-Romkun" + }, + "brr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Guadalcanal-Nggelic", + "level6": "Southeast Guadalcanal" + }, + "brs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Greater Kaili", + "level6": "Common Kaili" + }, + "brt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "North Tivoid" + }, + "bru": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "West Katuic", + "level3": "Brou-So", + "level4": "Eastern Bru-Katang" + }, + "brv": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "West Katuic", + "level3": "Brou-So", + "level4": "Western Bru-So" + }, + "brw": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "South-Western Dravidian" + }, + "brx": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Tiwa-Boro", + "level5": "Bodo-Mech-Kachari" + }, + "bry": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Sawosic", + "level3": "Burui-Gaikundi" + }, + "brz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Western Bel" + }, + "bsb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Bisaya-Lotud" + }, + "bsc": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Tenda", + "level3": "Bassari-Bedik-Bapen" + }, + "bsd": { + "level0": "Bookkeeping" + }, + "bse": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "South Ring" + }, + "bsf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Shiroro" + }, + "bsg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian" + }, + "bsh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Nuristani" + }, + "bsi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Manenguba" + }, + "bsj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja", + "level7": "Tulaic", + "level8": "Tula-Ma-Yebu", + "level9": "Nuclear Tulaic" + }, + "bsl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Basa", + "level7": "Kontagora-Gumna-Koromba", + "level8": "Gumna-Kontagora" + }, + "bsm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Central Yapen", + "level8": "Serui-Busami" + }, + "bsn": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Western Eastern Tucanoan", + "level3": "Barasano-Eduria-Macuna" + }, + "bso": { + "level0": "Bookkeeping" + }, + "bsp": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Northern Mel" + }, + "bsq": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Bassa-Klao", + "level5": "Bassaic" + }, + "bsr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Basa", + "level7": "Kontagora-Gumna-Koromba", + "level8": "Gumna-Kontagora" + }, + "bss": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Manenguba", + "level11": "Central Manenguba" + }, + "bst": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo" + }, + "bsu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki", + "level8": "East Coast Bungku-Tolaki" + }, + "bsv": { + "level0": "Bookkeeping" + }, + "bsw": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana", + "level8": "Baiso-Jiiddu" + }, + "bsx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Tarokoid", + "level5": "Yangkam-Tarok-Pe" + }, + "bsy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Bisaya-Lotud" + }, + "bsz": { + "level0": "Bookkeeping" + }, + "bta": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Bata-Bwatiye" + }, + "btb": { + "level0": "Bookkeeping" + }, + "btc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Bati-Mbure-Yambassa" + }, + "btd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Batakic", + "level4": "Northern Batak" + }, + "bte": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "Ningic" + }, + "btf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit", + "level6": "Birgit-Mogum-Toram" + }, + "btg": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Beteic", + "level3": "Eastern Bete" + }, + "bth": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Bidayuh", + "level5": "Central-Western Bidayuh" + }, + "bti": { + "level0": "Geelvink Bay", + "level1": "Burate-Wate" + }, + "btj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "East Borneo Malay", + "level6": "Banjar-Berau-Brunei Malay", + "level7": "Berau-Brunei Malay", + "level8": "Bruneic Malay", + "level9": "Brunei-Bacan Malay" + }, + "btl": { + "level0": "Bookkeeping" + }, + "btm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Batakic", + "level4": "Central-Southern Batak", + "level5": "Southern Batak", + "level6": "Angkola-Mandailing" + }, + "btn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "West Bisayan", + "level6": "Kuyan", + "level7": "Datagnon-Santa Teresa-Semirara" + }, + "bto": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol", + "level5": "Inland Bikol" + }, + "btp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Kilivila-Misima", + "level8": "Kilivilic" + }, + "btq": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian", + "level4": "Maniq-Menraq-Batek", + "level5": "Menraq-Batek", + "level6": "Batekic" + }, + "btr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Maewo" + }, + "bts": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Batakic", + "level4": "Central-Southern Batak" + }, + "btt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic", + "level7": "Bete-Obanliku" + }, + "btu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "North Tivoid" + }, + "btv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Indus Kohistanic", + "level9": "Outer Indus Kohistani", + "level10": "Bateri-Mankiyali" + }, + "btw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "South Bisayan", + "level6": "Butuan-Tausug" + }, + "btx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Batakic", + "level4": "Northern Batak" + }, + "bty": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "East Seram", + "level4": "East Rivers Seram" + }, + "btz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Batakic", + "level4": "Northern Batak" + }, + "bub": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Riverine Bua", + "level6": "Bua-Lua", + "level7": "Ba-Korom" + }, + "buc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "North-Central Malagasic" + }, + "bud": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Gurma", + "level12": "Gurma A" + }, + "buf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Bushoong-Wongo-Lele" + }, + "bug": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Tamanic-Bugis", + "level5": "Bugis" + }, + "buh": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Paheng-Younuo" + }, + "bui": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Mokiba-Ngando" + }, + "buj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Basa", + "level7": "Kontagora-Gumna-Koromba" + }, + "buk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "North Huon Gulf linkage" + }, + "bul": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "South Slavic", + "level5": "Eastern South Slavic", + "level6": "Macedo-Bulgarian" + }, + "bum": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Yaunde-Fang (A.70)" + }, + "bun": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Southern Mel", + "level3": "Bullom" + }, + "buo": { + "level0": "South Bougainville", + "level1": "Buinic", + "level2": "Buin" + }, + "bup": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Munan" + }, + "buq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Manep-Barem" + }, + "bus": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Busan", + "level5": "Boko-Busa" + }, + "but": { + "level0": "Nuclear Torricelli", + "level1": "Marienberg" + }, + "buu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Lebonya", + "level14": "Bantu D33", + "level15": "Budu-Ndaka-Mbo" + }, + "buv": { + "level0": "Yuat", + "level1": "Miyak-Bun-Biwat", + "level2": "Bun-Mundukumo" + }, + "buw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "B10-B30", + "level8": "Okani (B.30)", + "level9": "Southern Okani" + }, + "bux": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Boghomic" + }, + "buy": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Southern Mel", + "level3": "Bullom", + "level4": "Northern Bullom" + }, + "buz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Bebe-Kemezung", + "level8": "Naki-Kemezung", + "level9": "Nakic" + }, + "bva": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "Barainic" + }, + "bvb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi" + }, + "bvc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita", + "level9": "North Malaitan" + }, + "bvd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita", + "level9": "North Malaitan" + }, + "bve": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "East Borneo Malay", + "level6": "Banjar-Berau-Brunei Malay", + "level7": "Berau-Brunei Malay" + }, + "bvf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.3", + "level5": "Sokoroic", + "level6": "Miltuic" + }, + "bvg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)" + }, + "bvh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Unclassified Boleic" + }, + "bvi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Sere-Indri", + "level7": "Sere-Bviri", + "level8": "Bai-Viri" + }, + "bvj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Ogonoid", + "level5": "West Ogonoid" + }, + "bvk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Bukat-Ukit-Beketan-Lugat-Lisum" + }, + "bvl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "West-Central South American Sign" + }, + "bvm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "South Ring" + }, + "bvn": { + "level0": "Nuclear Torricelli", + "level1": "Marienberg" + }, + "bvo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Inland Bua", + "level6": "Bolgo-Koke" + }, + "bvp": { + "level0": "Bookkeeping" + }, + "bvq": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe", + "level2": "Unclassified Membi-Mangbutu-Efe" + }, + "bvr": { + "level0": "Maningrida", + "level1": "Bureran" + }, + "bvs": { + "level0": "Bookkeeping" + }, + "bvt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "Banda-Geser", + "level4": "Seran Laut", + "level5": "Geser-Gorom-Bati" + }, + "bvu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "East Borneo Malay", + "level6": "Banjar-Berau-Brunei Malay", + "level7": "Banjar-Bukit Malay" + }, + "bvw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Teraic", + "level5": "Eastern Tera" + }, + "bvx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha" + }, + "bvy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Warayan", + "level7": "Samar-Waray" + }, + "bvz": { + "level0": "Geelvink Bay" + }, + "bwa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Voh-Kone", + "level11": "Bwatooic", + "level12": "Haeke-Bwatoo" + }, + "bwb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Western Fijian" + }, + "bwc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi", + "level8": "Malungu-Central Sabi" + }, + "bwd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Bwaidoga linkage", + "level9": "Bwaidoka-Iduna" + }, + "bwe": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen", + "level3": "Geba-Bwe" + }, + "bwf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Dobu-Duau linkage", + "level9": "Boselewa-Galeya" + }, + "bwg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja", + "level9": "Senaic" + }, + "bwh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid" + }, + "bwi": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Northeast Japura-Colombia", + "level4": "Baniwa-Curripaco-Tariano", + "level5": "Baniwa-Curripaco" + }, + "bwj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Bwamu", + "level8": "Nuclear Bwamu" + }, + "bwk": { + "level0": "Mailuan", + "level1": "Bauwakic" + }, + "bwl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Mongala" + }, + "bwm": { + "level0": "Yuat", + "level1": "Miyak-Bun-Biwat", + "level2": "Bun-Mundukumo" + }, + "bwn": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Paheng-Younuo", + "level3": "Paheng" + }, + "bwo": { + "level0": "Ta-Ne-Omotic", + "level1": "Kefoid" + }, + "bwp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Dumut", + "level6": "Mandobo" + }, + "bwq": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Bobo" + }, + "bwr": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Bura-Marghi", + "level6": "Buraic" + }, + "bws": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Bombomic" + }, + "bwt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Bafawic-Bakweric", + "level11": "Bafawic" + }, + "bwu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Buli-Koma" + }, + "bwv": { + "level0": "Bookkeeping" + }, + "bww": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Middle Bomokandian", + "level15": "Late Bomokandian", + "level16": "Pagabeteic" + }, + "bwx": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Bu-Nao-Bunu" + }, + "bwy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Bwamu", + "level8": "Nuclear Bwamu" + }, + "bwz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic", + "level21": "Lumbuic", + "level22": "Lumbu-Bwisi" + }, + "bxa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Makira" + }, + "bxb": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Northern Lwoo" + }, + "bxc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Bengaic", + "level9": "Unclassified Bengaic" + }, + "bxd": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Northern Burmish", + "level5": "Maruic" + }, + "bxf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Label-Bilur" + }, + "bxg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain", + "level14": "Bobangi-Bangala-Lingala", + "level15": "Lingala-Bangala" + }, + "bxh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic" + }, + "bxi": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Central Karnic", + "level3": "Western Central Karnic", + "level4": "Pirlatapa-Dieric" + }, + "bxj": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Kanyara" + }, + "bxk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Northern Luyia" + }, + "bxl": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Jogo-Jeri", + "level6": "Jeri" + }, + "bxm": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Eastern Mongolic", + "level3": "Khalkha-Buriat", + "level4": "Buriat" + }, + "bxn": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Kanyara", + "level4": "Thalanyji-Burduna" + }, + "bxo": { + "level0": "Pidgin", + "level1": "Hausa-based pidgin" + }, + "bxp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Yaunde-Fang (A.70)", + "level9": "Ewondo-Bebele" + }, + "bxq": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Galambu-Bele", + "level9": "Kirfi-Bele", + "level10": "Ngamo-Bele", + "level11": "Bolanci-Bele" + }, + "bxr": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Eastern Mongolic", + "level3": "Khalkha-Buriat", + "level4": "Buriat" + }, + "bxs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Southwest Grassfields" + }, + "bxt": { + "level0": "Bookkeeping" + }, + "bxu": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Eastern Mongolic", + "level3": "Khalkha-Buriat", + "level4": "Buriat" + }, + "bxv": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Bagirmic", + "level6": "Morom-Jaya-Naba", + "level7": "Naba-Berakou" + }, + "bxw": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Duun-Jo", + "level5": "Duun-Seenku", + "level6": "Duun" + }, + "bxx": { + "level0": "Bookkeeping" + }, + "bxz": { + "level0": "Mailuan", + "level1": "Binaharic" + }, + "bya": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Palawanic", + "level4": "Northern Palawanic", + "level5": "Batak-Central Tagbanwa" + }, + "byb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid" + }, + "byc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "North-South Central Delta Cross", + "level7": "Ubaghara-Kohumono" + }, + "byd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Benyadu-Bekati" + }, + "bye": { + "level0": "Sepik", + "level1": "Ram", + "level2": "Pouye-Karawa" + }, + "byf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Yukubenic", + "level5": "Bete-Lufu" + }, + "byg": { + "level0": "Dajuic", + "level1": "Western Dajuic", + "level2": "Nyala Dajuic" + }, + "byh": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Chepangic" + }, + "byi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Bembe-Buyu" + }, + "byj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru" + }, + "byk": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Lakkia-Biao", + "level3": "Biaoic" + }, + "byl": { + "level0": "Bayono-Awbono" + }, + "bym": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric", + "level5": "Bidyaric" + }, + "byn": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "Agaw", + "level3": "Northern-Eastern-Western Agaw", + "level4": "Northeastern Agaw" + }, + "byo": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Bi-Ka" + }, + "byp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic" + }, + "byq": { + "level0": "Austronesian", + "level1": "East Formosan", + "level2": "Northern East Formosan" + }, + "byr": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Baruya-Simbari" + }, + "bys": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Northern Bikwin-Jen", + "level6": "Burak-Loo" + }, + "byt": { + "level0": "Saharan", + "level1": "Eastern Saharan" + }, + "byv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "East Bamileke" + }, + "byw": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Tamar", + "level6": "Yakkha-Athpariyic", + "level7": "Athpariyic" + }, + "byx": { + "level0": "Baining" + }, + "byy": { + "level0": "Bookkeeping" + }, + "byz": { + "level0": "Ramu" + }, + "bza": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Mende-Loma", + "level5": "Mende-Bandi", + "level6": "Bandi-Zialo" + }, + "bzb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Western Saluan-Banggai" + }, + "bzc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "North-Central Malagasic", + "level7": "Central-Eastern Malagasic" + }, + "bzd": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Western Isthmic Chibchan", + "level4": "Viceitic" + }, + "bze": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Soninke-Bozo", + "level4": "Bozo", + "level5": "Nuclear Bozo" + }, + "bzf": { + "level0": "Ndu" + }, + "bzg": { + "level0": "Austronesian", + "level1": "Western Plains Austronesian", + "level2": "Central Western Plains" + }, + "bzh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage", + "level9": "Mapos-Mangga-Wagau" + }, + "bzi": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Bisoid", + "level7": "Bisu-Pyen-Laomian" + }, + "bzj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Western Caribbean Creole", + "level14": "Miskitoic Creole English", + "level15": "Belize-Miskito Creole English" + }, + "bzk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Western Caribbean Creole", + "level14": "Miskitoic Creole English", + "level15": "Belize-Miskito Creole English" + }, + "bzl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tolitoli" + }, + "bzm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Mongala", + "level11": "Motemboic" + }, + "bzn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "West Piru Bay", + "level5": "Hoamoal", + "level6": "East Hoamoal" + }, + "bzp": { + "level0": "South Bird's Head Family", + "level1": "East South Bird's Head", + "level2": "Kemberanic" + }, + "bzq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "South Halmahera", + "level6": "Central-Eastern South Halmahera" + }, + "bzr": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Eastern Maric" + }, + "bzs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic" + }, + "bzt": { + "level0": "Artificial Language" + }, + "bzv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Bebe-Kemezung" + }, + "bzw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Basa" + }, + "bzx": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Soninke-Bozo", + "level4": "Bozo" + }, + "bzy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic", + "level7": "Bete-Obanliku" + }, + "bzz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid A", + "level8": "Tiv-Evand" + }, + "caa": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Cholan-Tzeltalan", + "level4": "Cholan", + "level5": "Chorti-Cholti" + }, + "cab": { + "level0": "Arawakan", + "level1": "Caribbean Arawakan", + "level2": "Antillean Arawakan", + "level3": "Ineric", + "level4": "Island Carib-Garifuna" + }, + "cac": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Kanjobalan-Chujean", + "level4": "Chujean" + }, + "cad": { + "level0": "Caddoan" + }, + "cae": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Cangin", + "level3": "Saafi-Noon-Lehar", + "level4": "Noon-Lehar" + }, + "caf": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central British Columbia Athabaskan", + "level4": "Carrieric", + "level5": "Dakelh" + }, + "cag": { + "level0": "Mataguayan", + "level1": "Mataguayo I" + }, + "cah": { + "level0": "Zaparoan", + "level1": "Iquito-Arabela", + "level2": "Cahuarano-Iquito" + }, + "cak": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Core Quichean", + "level5": "Cakchiquel-Tzutujil", + "level6": "Kaqchikelic" + }, + "cal": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Satawalese-Carolinian", + "level12": "Macro-Carolinian" + }, + "cam": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Cem-Pac" + }, + "can": { + "level0": "Lower Sepik" + }, + "cao": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Bolivian Nawa" + }, + "cap": { + "level0": "Uru-Chipaya" + }, + "caq": { + "level0": "Austroasiatic", + "level1": "Nicobaric" + }, + "car": { + "level0": "Cariban", + "level1": "Guianan" + }, + "cat": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance" + }, + "cav": { + "level0": "Pano-Tacanan", + "level1": "Tacanan" + }, + "caw": { + "level0": "Speech Register", + "level1": "Quechua-Puquina" + }, + "cax": { + "level0": "Chiquitano" + }, + "cay": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian" + }, + "cbb": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Northeast Japura-Colombia" + }, + "cbc": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan II", + "level4": "Pisamira-Yuruti", + "level5": "Pisamira-Carapana" + }, + "cbd": { + "level0": "Cariban", + "level1": "Guianan", + "level2": "Taranoan" + }, + "cbe": { + "level0": "Bookkeeping" + }, + "cbg": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Northern Magdalenic" + }, + "cbh": { + "level0": "Bookkeeping" + }, + "cbi": { + "level0": "Barbacoan", + "level1": "Awa-Southern Barbacoan", + "level2": "Cayapa-Colorado" + }, + "cbj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Eastern Ede" + }, + "cbk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic", + "level13": "South Castilic", + "level14": "Ternate-Zamboanga-Cavite" + }, + "cbl": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Lai Chin", + "level6": "Falamic" + }, + "cbm": { + "level0": "Bookkeeping" + }, + "cbn": { + "level0": "Austroasiatic", + "level1": "Monic" + }, + "cbo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos", + "level10": "Chokobo-Lemoro-Sanga" + }, + "cbq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Kainji Lake", + "level5": "Upper Niger Kainji", + "level6": "Oleran" + }, + "cbr": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano" + }, + "cbs": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano" + }, + "cbt": { + "level0": "Cahuapanan" + }, + "cbv": { + "level0": "Kakua-Nukak" + }, + "cbw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Warayan", + "level7": "Samar-Waray" + }, + "cby": { + "level0": "Unclassifiable" + }, + "cca": { + "level0": "Bookkeeping" + }, + "ccc": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Purus-Chamicuro", + "level3": "Chamicuro-Morike" + }, + "ccd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Brazil-Portugal Portuguese" + }, + "cce": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Tsonga-Copi", + "level12": "Chopi (S.60)" + }, + "ccg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Dakoid", + "level6": "Taram-Dirim-Nnakenyare" + }, + "cch": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Piti-Atsam" + }, + "ccj": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Wolof-BKK", + "level3": "Nyun", + "level4": "Buy" + }, + "ccl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Sabaki-Swahili", + "level11": "Swahili (G.40)", + "level12": "Mombasa-Lamu-Inland Swahili" + }, + "ccm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay" + }, + "cco": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Chinantec Group V" + }, + "ccp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga", + "level10": "Southeastern Bengali" + }, + "ccq": { + "level0": "Bookkeeping" + }, + "ccr": { + "level0": "Misumalpan", + "level1": "Sumalpan", + "level2": "Matagalpan" + }, + "ccx": { + "level0": "Bookkeeping" + }, + "ccy": { + "level0": "Bookkeeping" + }, + "cda": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Eastern Tibetic" + }, + "cde": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Teluguic" + }, + "cdf": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Central Old Kuki" + }, + "cdg": { + "level0": "Bookkeeping" + }, + "cdh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic" + }, + "cdi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "cdj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic", + "level9": "Bhadrawahi-Bhalesi-Curahi" + }, + "cdm": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Chepangic" + }, + "cdn": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Pithauragarh", + "level5": "Darma-Byangsi-Chaudangsi" + }, + "cdo": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Min", + "level3": "Coastal Min" + }, + "cdr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Kamuku-Hungwarya", + "level7": "Kamuku" + }, + "cds": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "American Sign" + }, + "cdy": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Then-MMS", + "level4": "Maonan-Mak-Sui", + "level5": "Maonan-Chadong" + }, + "cdz": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric" + }, + "cea": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Tsamosan", + "level3": "Coastal Tsamosan" + }, + "ceb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan" + }, + "ceg": { + "level0": "Zamucoan" + }, + "cek": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Khomic" + }, + "cen": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Zaric", + "level6": "Nuclear Zaric", + "level7": "Izeric" + }, + "ces": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Czech-Slovak" + }, + "cey": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "cfa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja", + "level7": "Tulaic" + }, + "cfd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Beromic", + "level5": "Iten-Cara-Berom", + "level6": "Cara-Berom" + }, + "cfg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Wurbo-Wannu", + "level7": "Wurbo" + }, + "cfm": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Lai Chin", + "level6": "Falamic" + }, + "cga": { + "level0": "Yuat" + }, + "cgc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "North Manobo" + }, + "cgg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "North Rutara", + "level12": "Nkore-Kiga-Nyoro-Tooro", + "level13": "Nkore-Kiga" + }, + "cgk": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic", + "level7": "Dzongkhic" + }, + "cha": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian" + }, + "chb": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Southern Magdalenic", + "level4": "Chibcha-Duit" + }, + "chc": { + "level0": "Siouan", + "level1": "Catawban" + }, + "chd": { + "level0": "Tequistlatecan" + }, + "che": { + "level0": "Nakh-Daghestanian", + "level1": "Nakh", + "level2": "Chechen-Ingush" + }, + "chf": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Cholan-Tzeltalan", + "level4": "Cholan", + "level5": "Chol-Chontal" + }, + "chg": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Turkestan" + }, + "chh": { + "level0": "Chinookan", + "level1": "Lower Chinookan" + }, + "chj": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Northwestern Chinantec", + "level6": "Chinantec Group I" + }, + "chk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Eastern Trukic", + "level12": "Mortlockese-Trukese" + }, + "chl": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Californian Uto-Aztecan", + "level3": "Cupan", + "level4": "Cahuilla-Cupeno" + }, + "chn": { + "level0": "Chinookan", + "level1": "Lower Chinookan" + }, + "cho": { + "level0": "Muskogean", + "level1": "Western Muskogean" + }, + "chp": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan" + }, + "chq": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Chinantec Group V" + }, + "chr": { + "level0": "Iroquoian" + }, + "chs": { + "level0": "Bookkeeping" + }, + "cht": { + "level0": "Hibito-Cholon" + }, + "chu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "South Slavic", + "level5": "Eastern South Slavic" + }, + "chv": { + "level0": "Turkic", + "level1": "Bolgar" + }, + "chw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Chuwaboic", + "level9": "Chuwabo-Maindo" + }, + "chx": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic", + "level5": "Gurungic", + "level6": "Thakali-Chantyal" + }, + "chy": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian" + }, + "chz": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Southeastern Chinantec", + "level6": "Chinantec Group III" + }, + "cia": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Butonic" + }, + "cib": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Fongbeic" + }, + "cic": { + "level0": "Muskogean", + "level1": "Western Muskogean" + }, + "cie": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Dghwedeic", + "level7": "Gudufic" + }, + "cih": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic", + "level9": "Bhadrawahi-Bhalesi-Curahi", + "level10": "Bhadarwahic", + "level11": "Chinali-Lahul Lohar" + }, + "cik": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Western West Himalayish", + "level4": "Kinnauric" + }, + "cim": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Bairisch", + "level10": "Global South Bavarian" + }, + "cin": { + "level0": "Tupian", + "level1": "Monde", + "level2": "Gavianic", + "level3": "Nuclear Gavianic" + }, + "cip": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Tlapanec-Manguean", + "level3": "Manguean" + }, + "cir": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian" + }, + "cit": { + "level0": "Bookkeeping" + }, + "ciw": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa", + "level6": "Nuclear Ojibwe", + "level7": "Central-Eastern-Southwestern Ojibwa" + }, + "ciy": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Mapoyo-Tamanaku", + "level3": "Cumana" + }, + "cja": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Cham" + }, + "cje": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Chru-Northern Cham", + "level6": "Chruic" + }, + "cjh": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Tsamosan", + "level3": "Inland Tsamosan" + }, + "cji": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic" + }, + "cjk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Chokwe-Lwena" + }, + "cjm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Cham" + }, + "cjn": { + "level0": "Sepik", + "level1": "Iwam-Wogamus", + "level2": "Wogamusin-Chenapian" + }, + "cjo": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak", + "level6": "Ashe-Asha", + "level7": "Asheninka" + }, + "cjp": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Western Isthmic Chibchan", + "level4": "Viceitic" + }, + "cjr": { + "level0": "Bookkeeping" + }, + "cjs": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "South Siberian Turkic", + "level3": "Sayan-Yenisei Turkic", + "level4": "Yenisey Turkic" + }, + "cjv": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Simbu", + "level3": "Chuave-Nomane" + }, + "cjy": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Northern Chinese" + }, + "cka": { + "level0": "Bookkeeping" + }, + "ckb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Laki-Kurdish", + "level8": "Kurdish" + }, + "ckc": { + "level0": "Bookkeeping" + }, + "ckd": { + "level0": "Bookkeeping" + }, + "cke": { + "level0": "Bookkeeping" + }, + "ckf": { + "level0": "Bookkeeping" + }, + "ckh": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Jingpho-Luish", + "level3": "Luish" + }, + "cki": { + "level0": "Bookkeeping" + }, + "ckj": { + "level0": "Bookkeeping" + }, + "ckk": { + "level0": "Bookkeeping" + }, + "ckl": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Bura-Marghi", + "level6": "Buraic" + }, + "ckn": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "South Peripheral Kuki-Chin", + "level5": "Choic", + "level6": "Daai-Nghmoye-Muun-Kaang" + }, + "cko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Northern Bia" + }, + "ckq": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Mubic" + }, + "ckr": { + "level0": "Baining" + }, + "cks": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French" + }, + "ckt": { + "level0": "Chukotko-Kamchatkan", + "level1": "Chukotian", + "level2": "R-Koryakic" + }, + "cku": { + "level0": "Muskogean", + "level1": "Alabaman-Koasati" + }, + "ckv": { + "level0": "Austronesian", + "level1": "East Formosan", + "level2": "Northern East Formosan" + }, + "ckw": { + "level0": "Bookkeeping" + }, + "ckx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid B" + }, + "cky": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Kofyar-Mushere-Chip" + }, + "ckz": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Core Quichean", + "level5": "Cakchiquel-Tzutujil", + "level6": "Kaqchikelic" + }, + "cla": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.4", + "level5": "Ronic" + }, + "clc": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central British Columbia Athabaskan" + }, + "cld": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic" + }, + "cle": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Southeastern Chinantec", + "level6": "Chinantec Group IV" + }, + "clh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Indus Kohistanic", + "level9": "Outer Indus Kohistani" + }, + "cli": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Chakalic", + "level11": "Chakali-Tamprusi-Vagala", + "level12": "Chakali-Tamprusi" + }, + "clj": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "clk": { + "level0": "Sino-Tibetan", + "level1": "Digarish" + }, + "cll": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Eastern Grusi", + "level9": "Tem-Chala", + "level10": "Bago-Delo-Cala", + "level11": "Delo-Cala" + }, + "clm": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "Straits Salish" + }, + "clo": { + "level0": "Tequistlatecan", + "level1": "Eastern Tequistlatecan" + }, + "clt": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Maraic", + "level5": "Nuclear Maraic" + }, + "clu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "West Bisayan", + "level6": "Kuyan", + "level7": "Datagnon-Santa Teresa-Semirara" + }, + "clw": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "South Siberian Turkic", + "level3": "Northern Altai-Lower Chulym" + }, + "cly": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Chatino", + "level5": "Core Chatino", + "level6": "Coastal Chatino", + "level7": "Eastern Chatino" + }, + "cma": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Koho-Maa" + }, + "cme": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Kirma-Tyurama" + }, + "cmi": { + "level0": "Chocoan", + "level1": "Embera", + "level2": "San Juan", + "level3": "Upper San Juan" + }, + "cml": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Tamanic-Bugis", + "level5": "Bugis" + }, + "cmn": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Northern Chinese", + "level5": "Mandarinic" + }, + "cmo": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau", + "level5": "Mnong", + "level6": "Southern-Central Mnong" + }, + "cmr": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Khomic" + }, + "cms": { + "level0": "Indo-European", + "level1": "Unclassified Indo-European" + }, + "cmt": { + "level0": "Speech Register", + "level1": "Zulu-Sotho" + }, + "cna": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Western Archaic Tibetan", + "level5": "Kenhatic" + }, + "cnb": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "South Peripheral Kuki-Chin", + "level5": "Choic" + }, + "cnc": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Bisoid", + "level7": "Phunoi-Coong" + }, + "cng": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Qiang", + "level5": "Upstream-Nu Qiang" + }, + "cnh": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Lai Chin" + }, + "cni": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak", + "level6": "Ashe-Asha" + }, + "cnk": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Khomic" + }, + "cnl": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Southeastern Chinantec", + "level6": "Chinantec Group IV" + }, + "cnm": { + "level0": "Bookkeeping" + }, + "cno": { + "level0": "Bookkeeping" + }, + "cnp": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Yue-Pinghua", + "level5": "Pinghua" + }, + "cnq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Nsari-Nooni-Ncane", + "level8": "Nooni-Ncane", + "level9": "Ncane-Cung", + "level10": "Cung-Mbuk" + }, + "cns": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Asmat", + "level4": "Central-Yaosakor Asmat" + }, + "cnt": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Northwestern Chinantec", + "level6": "Chinantec Group II" + }, + "cnu": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic" + }, + "cnw": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Sizangic" + }, + "coa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Betawic" + }, + "cob": { + "level0": "Mayan", + "level1": "Huastecan Mayan" + }, + "coc": { + "level0": "Cochimi-Yuman", + "level1": "Yuman", + "level2": "General Yuman", + "level3": "Delta-Californian Yuman" + }, + "cod": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup III", + "level7": "Omagua-Kokama" + }, + "coe": { + "level0": "Tucanoan", + "level1": "Western Tucanoan", + "level2": "Koreguaje-Tama" + }, + "cof": { + "level0": "Barbacoan", + "level1": "Awa-Southern Barbacoan", + "level2": "Cayapa-Colorado" + }, + "cog": { + "level0": "Austroasiatic", + "level1": "Pearic", + "level2": "Western Pearic" + }, + "coh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Mijikenda", + "level12": "Northern Mijikenda" + }, + "coj": { + "level0": "Cochimi-Yuman", + "level1": "Cochimic" + }, + "cok": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Corachol", + "level4": "Coran" + }, + "col": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Southern Interior Salish" + }, + "com": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Numic", + "level3": "Central Numic" + }, + "coo": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "North Georgia Central Salish" + }, + "cop": { + "level0": "Afro-Asiatic", + "level1": "Egyptian" + }, + "coq": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "Oregon Athabaskan", + "level5": "Rogue River" + }, + "cor": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Brythonic", + "level7": "Southwestern Brythonic", + "level8": "Middle-Modern Southwestern Brythonic" + }, + "cos": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Southern Romance", + "level8": "Sardo-Corsican", + "level9": "Corsic" + }, + "cot": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak" + }, + "cou": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Tenda" + }, + "cov": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Mulam-Kam", + "level4": "Kamic", + "level5": "Northern Kam" + }, + "cow": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Tsamosan", + "level3": "Inland Tsamosan" + }, + "cox": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Matsi-Nan" + }, + "coy": { + "level0": "Bookkeeping" + }, + "coz": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan" + }, + "cpa": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Northwestern Chinantec", + "level6": "Chinantec Group II" + }, + "cpb": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak", + "level6": "Ashe-Asha", + "level7": "Ashe-Asha Norte" + }, + "cpc": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak", + "level6": "Ashe-Asha", + "level7": "Ashe-Asha Norte" + }, + "cpg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "South Greek", + "level5": "Central Greek", + "level6": "Koineic Greek", + "level7": "Modern Koineic Greek", + "level8": "Pontic-Cappadocian Greek" + }, + "cpi": { + "level0": "Pidgin", + "level1": "English-based pidgin" + }, + "cpn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "South Guang", + "level8": "Hill South Guang", + "level9": "Gua-Cherepon" + }, + "cpo": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Duun-Jo", + "level5": "Duun-Seenku", + "level6": "Duun" + }, + "cps": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Peripheral Central Bisayan", + "level7": "Capiznon-Ilonggo-Kawayan" + }, + "cpu": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak", + "level6": "Ashe-Asha", + "level7": "Ashe-Asha Norte" + }, + "cpx": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Min", + "level3": "Coastal Min" + }, + "cpy": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak", + "level6": "Ashe-Asha", + "level7": "Asheninka" + }, + "cqd": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian", + "level7": "First Vernacular Hmong", + "level8": "Far Western Miao" + }, + "cqu": { + "level0": "Bookkeeping" + }, + "cra": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo" + }, + "crb": { + "level0": "Arawakan", + "level1": "Caribbean Arawakan", + "level2": "Antillean Arawakan", + "level3": "Ineric", + "level4": "Island Carib-Garifuna" + }, + "crc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Ambrym" + }, + "crd": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Southern Interior Salish" + }, + "crf": { + "level0": "Chocoan", + "level1": "Unclassified Chocoan" + }, + "crg": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi", + "level4": "Cree", + "level5": "Plains Creeic" + }, + "crh": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "West Kipchak", + "level6": "Crimean Tatar-Urum", + "level7": "Crimeaic" + }, + "cri": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Lower Guinea Portuguese", + "level15": "Bantu Layer Lower Guinea Portuguese", + "level16": "Saotomic" + }, + "crj": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi", + "level4": "Cree" + }, + "crk": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi", + "level4": "Cree", + "level5": "Plains Creeic" + }, + "crl": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi", + "level4": "Cree" + }, + "crm": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi", + "level4": "Cree" + }, + "crn": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Corachol", + "level4": "Coran" + }, + "cro": { + "level0": "Siouan", + "level1": "Missouri River Siouan" + }, + "crq": { + "level0": "Mataguayan", + "level1": "Mataguayo II", + "level2": "Chorote" + }, + "crr": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian" + }, + "crs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Isle-de-France Creole" + }, + "crt": { + "level0": "Mataguayan", + "level1": "Mataguayo II", + "level2": "Chorote" + }, + "cru": { + "level0": "Bookkeeping" + }, + "crv": { + "level0": "Austroasiatic", + "level1": "Nicobaric", + "level2": "Nuclear Nicobaric", + "level3": "Chowra-Teressa" + }, + "crw": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau" + }, + "crx": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central British Columbia Athabaskan", + "level4": "Carrieric", + "level5": "Dakelh" + }, + "cry": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Hyamic" + }, + "crz": { + "level0": "Chumashan", + "level1": "Southern Chumashan" + }, + "csa": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Northwestern Chinantec", + "level6": "Chinantec Group I", + "level7": "Tlacoatzintepec-Chiltepec" + }, + "csb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Lechitic" + }, + "csc": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Spanish Sign", + "level3": "Nuclear Spanish Sign" + }, + "csd": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Old Chiangmai-Bangkok Sign" + }, + "cse": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Central European Sign", + "level4": "Nuclear Central European Sign" + }, + "csf": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "csg": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "csh": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "South Peripheral Kuki-Chin", + "level5": "Ashoic" + }, + "csi": { + "level0": "Miwok-Costanoan", + "level1": "Miwokan", + "level2": "Western Miwokan" + }, + "csj": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "csk": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "FH-Jola" + }, + "csl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "CSLic" + }, + "csm": { + "level0": "Miwok-Costanoan", + "level1": "Miwokan", + "level2": "Eastern Miwokan", + "level3": "Sierra Miwokan" + }, + "csn": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "West-Central South American Sign" + }, + "cso": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Northwestern Chinantec", + "level6": "Chinantec Group I" + }, + "csp": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Yue-Pinghua", + "level5": "Pinghua" + }, + "csq": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Yugoslav Sign" + }, + "csr": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "css": { + "level0": "Miwok-Costanoan", + "level1": "Costanoan", + "level2": "Southern Costanoan" + }, + "cst": { + "level0": "Miwok-Costanoan", + "level1": "Costanoan" + }, + "csv": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "South Peripheral Kuki-Chin", + "level5": "Ashoic" + }, + "csw": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi", + "level4": "Cree" + }, + "csx": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "csy": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Sizangic" + }, + "csz": { + "level0": "Coosan" + }, + "cta": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Chatino", + "level5": "Core Chatino", + "level6": "Coastal Chatino" + }, + "ctc": { + "level0": "Bookkeeping" + }, + "ctd": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Thadoic" + }, + "cte": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Southeastern Chinantec", + "level6": "Chinantec Group IV" + }, + "ctg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga", + "level10": "Southeastern Bengali" + }, + "cth": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "cti": { + "level0": "Bookkeeping" + }, + "ctl": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Northwestern Chinantec", + "level6": "Chinantec Group I", + "level7": "Tlacoatzintepec-Chiltepec" + }, + "ctn": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Tamar" + }, + "cto": { + "level0": "Chocoan", + "level1": "Embera", + "level2": "Atrato" + }, + "ctp": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Chatino", + "level5": "Core Chatino", + "level6": "Coastal Chatino", + "level7": "Eastern Chatino" + }, + "cts": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bikol" + }, + "ctt": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "ctu": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Cholan-Tzeltalan", + "level4": "Cholan", + "level5": "Chol-Chontal" + }, + "cty": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid" + }, + "ctz": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Chatino", + "level5": "Core Chatino", + "level6": "Coastal Chatino", + "level7": "Eastern Chatino" + }, + "cua": { + "level0": "Austroasiatic", + "level1": "Bahnaric" + }, + "cub": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Western Eastern Tucanoan", + "level3": "Cubeo-Desano" + }, + "cuc": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Northwestern Chinantec", + "level6": "Chinantec Group I" + }, + "cuh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Eastern Kirinyaga" + }, + "cui": { + "level0": "Guahiboan", + "level1": "Nuclear Guahiboan", + "level2": "Central Guahibo" + }, + "cuj": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Purus-Chamicuro", + "level3": "Purus", + "level4": "Yineic", + "level5": "Western Yineic" + }, + "cuk": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Eastern Isthmic Chibchan", + "level4": "Kuna" + }, + "cul": { + "level0": "Arawan", + "level1": "Madi-Madiha", + "level2": "Madiha" + }, + "cum": { + "level0": "Bookkeeping" + }, + "cun": { + "level0": "Bookkeeping" + }, + "cuo": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Mapoyo-Tamanaku", + "level3": "Cumana" + }, + "cup": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Californian Uto-Aztecan", + "level3": "Cupan", + "level4": "Cahuilla-Cupeno" + }, + "cuq": { + "level0": "Tai-Kadai", + "level1": "Hlaic", + "level2": "Nuclear Hlaic" + }, + "cur": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Tamar" + }, + "cut": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Cuicatec" + }, + "cuu": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Southern Shanic", + "level11": "Wuding-Yuanyang Tai" + }, + "cuv": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Matakam", + "level5": "Mefele-Cuvok" + }, + "cux": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Cuicatec" + }, + "cvg": { + "level0": "Sino-Tibetan", + "level1": "Kho-Bwa", + "level2": "Western Kho-Bwa", + "level3": "Chug-Lish" + }, + "cvn": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Chinantecan", + "level4": "Central-Eastern Chinantec", + "level5": "Southeastern Chinantec", + "level6": "Chinantec Group III" + }, + "cwa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "North Mara", + "level12": "Unclassified North Mara" + }, + "cwb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Chuwaboic", + "level9": "Chuwabo-Maindo" + }, + "cwd": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi", + "level4": "Cree" + }, + "cwe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "East Ruvu", + "level11": "Central East Ruvu" + }, + "cwg": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian" + }, + "cwt": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "FH-Jola", + "level7": "PF-Jola", + "level8": "Kwatay-Karon-Mlomp" + }, + "cxh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi", + "level7": "Zeemic", + "level8": "Nuclear Zeemic" + }, + "cya": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Chatino", + "level5": "Core Chatino", + "level6": "Coastal Chatino", + "level7": "Eastern Chatino" + }, + "cym": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Brythonic", + "level7": "Old-Modern Welsh" + }, + "cyo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "West Bisayan", + "level6": "Kuyan" + }, + "czh": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Wu-Hui Chinese" + }, + "czn": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Chatino", + "level5": "Core Chatino" + }, + "czo": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Min", + "level3": "Inland Min" + }, + "czt": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Maraic" + }, + "daa": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit", + "level6": "Dangla" + }, + "dac": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage", + "level9": "Mumeng", + "level10": "Dambi-Kumaru" + }, + "dad": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Western Bel" + }, + "dae": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru", + "level7": "Diic" + }, + "daf": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Mano-Dan", + "level4": "Guro-Dan", + "level5": "Dan-Toura" + }, + "dag": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Southeast Western Oti-Volta", + "level13": "Mampruli-Dagbani" + }, + "dah": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup" + }, + "dai": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day" + }, + "daj": { + "level0": "Dajuic", + "level1": "Western Dajuic", + "level2": "Nyala Dajuic" + }, + "dak": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dakotan", + "level3": "Sioux" + }, + "dal": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic" + }, + "dam": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Northwestern Kainji", + "level6": "Clela-Damakawa" + }, + "dan": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "South Scandinavian" + }, + "dao": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "South Peripheral Kuki-Chin", + "level5": "Choic", + "level6": "Daai-Nghmoye-Muun-Kaang" + }, + "dap": { + "level0": "Bookkeeping" + }, + "daq": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Southeast Gondi", + "level5": "South Bastar Gondi-Koya" + }, + "dar": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Dargwic" + }, + "das": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee", + "level5": "Guere-Krahn" + }, + "dat": { + "level0": "Bookkeeping" + }, + "dau": { + "level0": "Dajuic", + "level1": "Western Dajuic" + }, + "dav": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Taita-Sagalla" + }, + "daw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Mansakan" + }, + "dax": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Southern Yolngu", + "level3": "Southern-Eastern Yolngu" + }, + "daz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Paniai Lakes", + "level2": "Auye-Dao" + }, + "dbb": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Gera-Geruma-Kubi-Deno", + "level9": "Kubi-Deno" + }, + "dbd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja", + "level7": "Tulaic", + "level8": "Tula-Ma-Yebu", + "level9": "Nuclear Tulaic" + }, + "dbe": { + "level0": "Tor-Orya", + "level1": "Tor", + "level2": "Coastal Tor" + }, + "dbf": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "Central Tariku" + }, + "dbg": { + "level0": "Dogon", + "level1": "North Plateau Dogon" + }, + "dbi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Northern Benue-Congo Plateau", + "level5": "Nuclear Northern Benue-Congo Plateau" + }, + "dbj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Northeast Sabahan" + }, + "dbl": { + "level0": "Pama-Nyungan" + }, + "dbm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Jarawaic" + }, + "dbn": { + "level0": "Inanwatan" + }, + "dbo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Jaku-Gubi" + }, + "dbp": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.1" + }, + "dbq": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Dabaic", + "level5": "Daba-Mazagway-Kola" + }, + "dbr": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana", + "level8": "Dabarre-Tunni" + }, + "dbt": { + "level0": "Dogon", + "level1": "Nangan Dogon" + }, + "dbu": { + "level0": "Dogon", + "level1": "North Plateau Dogon", + "level2": "Yanda-Bondum-Tebul" + }, + "dbv": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "dbw": { + "level0": "Dogon", + "level1": "Nangan Dogon" + }, + "dcr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Southwestern Dutch", + "level9": "Zeeuwic" + }, + "ddd": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Lotuxo", + "level5": "Lopit-Dongotono", + "level6": "Dongotonic" + }, + "dde": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Nuclear Northern Kikongo" + }, + "ddg": { + "level0": "Timor-Alor-Pantar", + "level1": "East Timor", + "level2": "Fataluku-Oirata" + }, + "ddi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Bwaidoga linkage" + }, + "ddj": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Ngumpin", + "level4": "Western Ngumpin" + }, + "ddn": { + "level0": "Songhay", + "level1": "Eastern Songhay", + "level2": "Zarma-Kaado-Dendi" + }, + "ddo": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Tsezic", + "level4": "West Tsezic" + }, + "ddr": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Eastern Victoria", + "level4": "Dhudhuroa-Pallanganmiddang" + }, + "dds": { + "level0": "Dogon", + "level1": "Escarpment Dogon" + }, + "ddw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "North Babaric" + }, + "dec": { + "level0": "Narrow Talodi", + "level1": "Buram-Saraf", + "level2": "Buram Hill Chain" + }, + "ded": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Rawlinson", + "level5": "Pindiu" + }, + "dee": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Bassa-Klao", + "level5": "Bassaic" + }, + "def": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Luric-Dezfulic" + }, + "deg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Delta Edoid", + "level6": "Degema-Engenni" + }, + "deh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic", + "level9": "Eastern Farsic" + }, + "dei": { + "level0": "Geelvink Bay" + }, + "dek": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "dep": { + "level0": "Pidgin", + "level1": "Delaware-based pidgin" + }, + "deq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic", + "level7": "Nuclear Ngbandic" + }, + "der": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo" + }, + "des": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Western Eastern Tucanoan", + "level3": "Cubeo-Desano", + "level4": "Yupua-Siriano-Desano", + "level5": "Siriano-Desano" + }, + "deu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Upper Franconian", + "level10": "Global German" + }, + "dev": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Yupna", + "level4": "Bwana-Moam-Tapen" + }, + "dez": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Nkutsu-Lokenye" + }, + "dga": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Safaliba-Dagaare", + "level14": "Dagaaric", + "level15": "Central-South Dagaric" + }, + "dgb": { + "level0": "Dogon", + "level1": "West Dogon", + "level2": "Penangic" + }, + "dgc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Northeastern Luzon", + "level4": "Nuclear Northeastern Luzon" + }, + "dgd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Safaliba-Dagaare", + "level14": "Dagaaric", + "level15": "Central-South Dagaric", + "level16": "South Dagaric" + }, + "dge": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup", + "level4": "Nuclear Warup", + "level5": "Degenanic" + }, + "dgg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Are linkage", + "level10": "Are-Doga" + }, + "dgh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Dghwedeic" + }, + "dgi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Safaliba-Dagaare", + "level14": "Dagaaric", + "level15": "North-West Dagaric" + }, + "dgk": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone-Chari", + "level7": "Sido" + }, + "dgn": { + "level0": "Yangmanic" + }, + "dgo": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Kangri-Dogri" + }, + "dgr": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Slaveyic" + }, + "dgs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Dogoso-Khe" + }, + "dgu": { + "level0": "Bookkeeping" + }, + "dgx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Coastal Binanderean", + "level6": "Baruga-Doghoro" + }, + "dgz": { + "level0": "Dagan", + "level1": "Central Dagan", + "level2": "Southwest Dagan" + }, + "dha": { + "level0": "Bookkeeping" + }, + "dhd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Eastern Rajasthani" + }, + "dhg": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Southern Yolngu", + "level3": "Southern-Eastern Yolngu" + }, + "dhi": { + "level0": "Sino-Tibetan", + "level1": "Dhimal-Lhokpu-Toto" + }, + "dhl": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Kanyara", + "level4": "Thalanyji-Burduna" + }, + "dhm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Herero (R.30)" + }, + "dhn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Khandesic" + }, + "dho": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "dhr": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Mantharta", + "level4": "Wariyangga-Dhargari" + }, + "dhs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Kamba-Dhaisu" + }, + "dhu": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Yuin-Kuri", + "level4": "Yuin", + "level5": "Northern Costal Yuin" + }, + "dhv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Loyalty Islands" + }, + "dhw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Unclassified Bihari", + "level10": "Kuswaric" + }, + "dia": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Galu-Alu" + }, + "dib": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Dinka-Nuer", + "level3": "Dinka" + }, + "dic": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Neyo-Dida", + "level3": "Dida", + "level4": "Guebie-Lakota Dida" + }, + "did": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southwest Surmic", + "level3": "Didinga-Murle", + "level4": "Didinga-Longarim" + }, + "dif": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Central Karnic", + "level3": "Western Central Karnic", + "level4": "Pirlatapa-Dieric", + "level5": "Dieric" + }, + "dig": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Mijikenda", + "level12": "Southern Mijikenda" + }, + "dih": { + "level0": "Cochimi-Yuman", + "level1": "Yuman", + "level2": "General Yuman", + "level3": "Delta-Californian Yuman", + "level4": "Diegueno" + }, + "dii": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Bafia (A.50)", + "level8": "Nuclear Bafia (A.50)" + }, + "dij": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "North Babaric" + }, + "dik": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Dinka-Nuer", + "level3": "Dinka" + }, + "dil": { + "level0": "Nubian", + "level1": "Central Nubian", + "level2": "Kordofan Nubian", + "level3": "Western Kordofan Nubian" + }, + "dim": { + "level0": "South Omotic" + }, + "dio": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid", + "level6": "Dibo-Kupa", + "level7": "Abawa" + }, + "dip": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Dinka-Nuer", + "level3": "Dinka" + }, + "diq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Zaza" + }, + "dir": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Dakoid", + "level6": "Taram-Dirim-Nnakenyare", + "level7": "Dirim-Nnakenyare" + }, + "dis": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Dimasa-Kokborok" + }, + "diu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Kwangali-Diriku" + }, + "div": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Dhivehi-Sinhala" + }, + "diw": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Dinka-Nuer", + "level3": "Dinka" + }, + "dix": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Southwestern Malakula" + }, + "diy": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Asmat", + "level4": "Citak Asmat" + }, + "diz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic" + }, + "djb": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Western-Inland Yolngu" + }, + "djc": { + "level0": "Dajuic", + "level1": "Western Dajuic" + }, + "djd": { + "level0": "Mirndi", + "level1": "Yirram" + }, + "dje": { + "level0": "Songhay", + "level1": "Eastern Songhay", + "level2": "Zarma-Kaado-Dendi" + }, + "djf": { + "level0": "Pama-Nyungan", + "level1": "Yimidhirr-Yalanji-Yidinic", + "level2": "Yalandyic" + }, + "dji": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Western-Inland Yolngu" + }, + "djj": { + "level0": "Maningrida", + "level1": "Nakkara-Ndjebbana" + }, + "djk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Surinamese Creole English", + "level13": "Eastern Maroons", + "level14": "Ndyuka" + }, + "djm": { + "level0": "Dogon", + "level1": "Plains Dogon" + }, + "djn": { + "level0": "Gunwinyguan", + "level1": "Western Gunwinyguan" + }, + "djo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Southern Land Dayak" + }, + "djr": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Southern Yolngu", + "level3": "Southern-Eastern Yolngu", + "level4": "Dhuwal-Dhuwala", + "level5": "Eastern Dhuwal-Dhuwala" + }, + "dju": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Central Sepik Hill", + "level3": "Nuclear Central Sepik Hill", + "level4": "Kapriman-Watakataui" + }, + "djw": { + "level0": "Nyulnyulan", + "level1": "Western Nyulnyulan", + "level2": "Bardic" + }, + "dka": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Dakpa-Dzala" + }, + "dkg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Tarokoid", + "level5": "Bijimic-Sur-Shall", + "level6": "Kwangic", + "level7": "Vaghat" + }, + "dkk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Pitu Ulunna Salu" + }, + "dkl": { + "level0": "Bookkeeping" + }, + "dkr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic" + }, + "dks": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Dinka-Nuer", + "level3": "Dinka" + }, + "dkx": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Dabaic", + "level5": "Daba-Mazagway-Kola" + }, + "dlg": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Sakha-Dolgan" + }, + "dlk": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "Tigre-Dahalik" + }, + "dlm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Italo-Dalmatian", + "level9": "Dalmatian Romance" + }, + "dln": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Mizoic" + }, + "dma": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Nzebi-Laali-Yaa", + "level19": "Njebi (B.50)", + "level20": "Ndjavi B" + }, + "dmb": { + "level0": "Dogon", + "level1": "West Dogon", + "level2": "Penangic" + }, + "dmc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert" + }, + "dme": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Meri", + "level7": "Dugwor-Merey" + }, + "dmf": { + "level0": "Speech Register", + "level1": "Atlantic-Congo Speech Register" + }, + "dmg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Paitanic", + "level7": "Upper Kinabatangan-Lobu" + }, + "dmk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone" + }, + "dml": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan" + }, + "dmm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Northern Mbum", + "level6": "Dama-Galke" + }, + "dmo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Bebe-Kemezung", + "level8": "Naki-Kemezung" + }, + "dmr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku" + }, + "dms": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Southern Tomini" + }, + "dmu": { + "level0": "Pauwasi", + "level1": "Western Pauwasi" + }, + "dmv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic", + "level7": "Rungus-Mangkaak-Labuk", + "level8": "Dumpas-Sukang" + }, + "dmw": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Ngumpin", + "level4": "Eastern Ngumpin" + }, + "dmx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Unclassified Shona (S. 10)" + }, + "dmy": { + "level0": "Sentanic" + }, + "dna": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Central Dani", + "level3": "Grand Valley Dani", + "level4": "Walakic" + }, + "dnd": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic", + "level3": "Simog-Daonda" + }, + "dne": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic", + "level9": "Matengic", + "level10": "Ndendeule-Ngindo" + }, + "dng": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Northern Chinese", + "level5": "Mandarinic", + "level6": "Zhongyuan" + }, + "dni": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Central Dani", + "level3": "Grand Valley Dani", + "level4": "Southeast Grand Valley Dani" + }, + "dnk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "West Rote", + "level5": "Dengka-Meto" + }, + "dnn": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Duun-Jo", + "level5": "Duun-Seenku", + "level6": "Duun" + }, + "dno": { + "level0": "Central Sudanic", + "level1": "Lenduic", + "level2": "Bale" + }, + "dnr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Peka", + "level4": "Urigina-Danaru" + }, + "dnt": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Central Dani", + "level3": "Grand Valley Dani" + }, + "dnu": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic" + }, + "dnw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Central Dani" + }, + "dny": { + "level0": "Arawan", + "level1": "Madi-Madiha", + "level2": "Madiha" + }, + "doa": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Simbu", + "level3": "Nuclear Simbu", + "level4": "Kuman-Dom-Gunaa" + }, + "dob": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Dobu-Duau linkage" + }, + "doc": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Mulam-Kam", + "level4": "Kamic", + "level5": "Northern Kam" + }, + "doe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "East Ruvu" + }, + "dof": { + "level0": "Mailuan" + }, + "doh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Dakoid", + "level6": "Tiba-Dong" + }, + "dok": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Northern Tomini" + }, + "dol": { + "level0": "Doso-Turumsa" + }, + "don": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "West Central Papuan linkage", + "level9": "Nuclear West Central Papuan linkage" + }, + "doo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mbaic", + "level6": "Ndunga-Mba-Dongo" + }, + "dop": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Eastern Grusi", + "level9": "Kabiyeic" + }, + "doq": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "American Sign" + }, + "dor": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Southern Malaita" + }, + "dos": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Kaansa-Dogose", + "level7": "Dogose-Khisa" + }, + "dot": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi" + }, + "dov": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Greater Eastern Botatwe", + "level9": "Central Eastern Botatwe" + }, + "dow": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Northern Samba-Duru" + }, + "dox": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Konsoid", + "level8": "Gidole-Bussa" + }, + "doy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Gonja-Dompo" + }, + "doz": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo", + "level3": "Central Ometo" + }, + "dpp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic" + }, + "drb": { + "level0": "Nubian", + "level1": "Central Nubian", + "level2": "Kordofan Nubian", + "level3": "Eastern Kordofan Nubian" + }, + "drc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance" + }, + "drd": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Pithauragarh", + "level5": "Darma-Byangsi-Chaudangsi", + "level6": "Darma-Byangsi", + "level7": "Zhangzhungic" + }, + "dre": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Dolpo-Tichurong" + }, + "drg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic", + "level7": "Rungus-Mangkaak-Labuk" + }, + "dri": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Northwestern Kainji", + "level6": "Clela-Damakawa" + }, + "drl": { + "level0": "Pama-Nyungan", + "level1": "Yarli-Baagandji" + }, + "drn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar" + }, + "dro": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Melanau-Kajang", + "level5": "Melanau" + }, + "drq": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang" + }, + "drr": { + "level0": "Bookkeeping" + }, + "drs": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Highland East Cushitic", + "level4": "Sidaama-Hadiyya-Kambaata", + "level5": "Sidaama-Gedeo" + }, + "dru": { + "level0": "Austronesian" + }, + "dry": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Unclassified Bihari", + "level10": "Kuswaric" + }, + "dsb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Sorbian" + }, + "dse": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Dutch-Belgian Sign" + }, + "dsh": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Western Omo-Tana" + }, + "dsi": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Peripherique", + "level6": "Koulfaic" + }, + "dsk": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi", + "level7": "Zeemic" + }, + "dsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "West Scandinavian Sign", + "level4": "Danish Sign" + }, + "dsn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Biakic" + }, + "dso": { + "level0": "Bookkeeping" + }, + "dsq": { + "level0": "Songhay", + "level1": "Northwest Songhay", + "level2": "Northern Songhay" + }, + "dsz": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "dta": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic" + }, + "dtb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic", + "level7": "Rungus-Mangkaak-Labuk", + "level8": "Dumpas-Sukang" + }, + "dtd": { + "level0": "Wakashan", + "level1": "Southern Wakashan", + "level2": "Makah-Nitinat" + }, + "dti": { + "level0": "Dogon", + "level1": "North Plateau Dogon", + "level2": "Yanda-Bondum-Tebul", + "level3": "Yanda-Ana" + }, + "dtk": { + "level0": "Dogon", + "level1": "Plains Dogon", + "level2": "Western Plains Dogon" + }, + "dtm": { + "level0": "Dogon", + "level1": "Plains Dogon", + "level2": "Western Plains Dogon" + }, + "dtn": { + "level0": "Gumuz" + }, + "dto": { + "level0": "Dogon", + "level1": "Escarpment Dogon" + }, + "dtp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic", + "level7": "Kadazan-Sugut-Minokok" + }, + "dtr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Bisaya-Lotud" + }, + "dts": { + "level0": "Dogon", + "level1": "Escarpment Dogon" + }, + "dtt": { + "level0": "Dogon", + "level1": "Plains Dogon" + }, + "dtu": { + "level0": "Dogon", + "level1": "North Plateau Dogon", + "level2": "Yanda-Bondum-Tebul" + }, + "dty": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Indo-Aryan Northern zone", + "level8": "Eastern Pahari" + }, + "dua": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Dualaic", + "level9": "Duala-Malimba" + }, + "dub": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Gujaratic" + }, + "dud": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Northwestern Kainji", + "level6": "Dukaic" + }, + "due": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine" + }, + "duf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Extreme Southern New Caledonian" + }, + "dug": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Mijikenda", + "level12": "Northern Mijikenda" + }, + "duh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "dui": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Greater Yaganon", + "level4": "Yaganon" + }, + "duj": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Southern Yolngu", + "level3": "Southern-Eastern Yolngu", + "level4": "Dhuwal-Dhuwala", + "level5": "Western Dhuwal-Dhuwala" + }, + "duk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Nuru" + }, + "dul": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Alabat-Manide Agta" + }, + "dum": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch" + }, + "dun": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage" + }, + "duo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Northeastern Luzon" + }, + "dup": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic" + }, + "duq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage" + }, + "dur": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru", + "level7": "Diic" + }, + "dus": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Upper Dudhkosi" + }, + "duu": { + "level0": "Sino-Tibetan", + "level1": "Nungish", + "level2": "Gunong" + }, + "duv": { + "level0": "Lakes Plain", + "level1": "Tariku" + }, + "duw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito" + }, + "dux": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Duun-Jo", + "level5": "Duun-Seenku", + "level6": "Duun" + }, + "duy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Unclassified Northern Luzon" + }, + "duz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Unclassified Central Adamawa" + }, + "dva": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Dobu-Duau linkage" + }, + "dwa": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2" + }, + "dwk": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "dwl": { + "level0": "Bookkeeping" + }, + "dwr": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo", + "level3": "Central Ometo", + "level4": "Dawro-Gofa-Gamo" + }, + "dws": { + "level0": "Artificial Language" + }, + "dww": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Kakabai linkage" + }, + "dwz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Unclassified Bihari", + "level10": "Kuswaric" + }, + "dya": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Lobiri-Jaane" + }, + "dyb": { + "level0": "Nyulnyulan", + "level1": "Western Nyulnyulan", + "level2": "Nyulnyulic" + }, + "dyd": { + "level0": "Nyulnyulan", + "level1": "Eastern Nyulnyulan", + "level2": "Yawuric" + }, + "dyg": { + "level0": "Unattested", + "level1": "Austronesian (Unattested)" + }, + "dyi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "South Senufo", + "level5": "Tagbana-Jimini" + }, + "dyk": { + "level0": "Bookkeeping" + }, + "dym": { + "level0": "Dogon", + "level1": "North Plateau Dogon", + "level2": "Yanda-Bondum-Tebul", + "level3": "Yanda-Ana" + }, + "dyn": { + "level0": "Pama-Nyungan", + "level1": "Macleay-New England" + }, + "dyo": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola" + }, + "dyr": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi", + "level7": "Zakse-Saya" + }, + "dyu": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding" + }, + "dyy": { + "level0": "Pama-Nyungan", + "level1": "Yimidhirr-Yalanji-Yidinic", + "level2": "Yidinic" + }, + "dza": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos" + }, + "dzd": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Galambu-Bele", + "level9": "Kirfi-Bele", + "level10": "Giiwo-Daza" + }, + "dze": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Mantharta", + "level4": "Djiwarli-Thiin" + }, + "dzg": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Tebu" + }, + "dzl": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Dakpa-Dzala" + }, + "dzn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Bamweic" + }, + "dzo": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic", + "level7": "Dzongkhic", + "level8": "Nuclear Dzongkhic" + }, + "ebg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "West Lower Cross", + "level7": "Oroic", + "level8": "Ebughu-Oro" + }, + "ebo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Ngungwel-Eboo" + }, + "ebr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Potou" + }, + "ebu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu" + }, + "ecr": { + "level0": "Unclassifiable" + }, + "ecs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "West-Central South American Sign" + }, + "ecy": { + "level0": "Unclassifiable" + }, + "eee": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Unclassified Northern Tai" + }, + "efa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross" + }, + "efe": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe", + "level2": "Mangbutu-Efe", + "level3": "Leseic" + }, + "efi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Efikic", + "level8": "Okop Usem", + "level9": "Efik-Ibibio" + }, + "ega": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo" + }, + "egl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian", + "level12": "Emiliano-Romagnolo" + }, + "ego": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Jilic-Eggonic", + "level5": "Eggon-Ake" + }, + "egy": { + "level0": "Afro-Asiatic", + "level1": "Egyptian" + }, + "ehs": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "ehu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Osse", + "level7": "Ukue-Ehueun" + }, + "eip": { + "level0": "Nuclear Trans New Guinea", + "level1": "Mek", + "level2": "Eastern Mek" + }, + "eit": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Kombio-Yambes", + "level3": "Kombioic" + }, + "eiv": { + "level0": "North Bougainville", + "level1": "Rotokas-Askopan" + }, + "eja": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "FH-Jola", + "level7": "PF-Jola", + "level8": "Her-Ejamat" + }, + "eka": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid", + "level7": "Bakor-Ejagham", + "level8": "Bakor", + "level9": "Northern Bakor", + "level10": "Nnam-Ekajuk" + }, + "eke": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ekit-Etebi" + }, + "ekg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Paniai Lakes", + "level2": "Mee-Wodani" + }, + "eki": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Efikic", + "level8": "Unclassified Efikic" + }, + "ekk": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "Central Finnic" + }, + "ekl": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Santalic" + }, + "ekm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Bati-Mbure-Yambassa", + "level10": "Mbure-Yambassa", + "level11": "Yambassa (A.60)", + "level12": "Mmala-Elip-Gunu", + "level13": "Elip-Gunu" + }, + "eko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Koti-Nathembo" + }, + "ekp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Igboid" + }, + "ekr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Yatye-Akpa" + }, + "eky": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen", + "level3": "Kayah-Yintale", + "level4": "Kayah" + }, + "ele": { + "level0": "Nuclear Torricelli", + "level1": "Marienberg", + "level2": "Elepi-Kamasau-Marienberg" + }, + "elh": { + "level0": "Nubian", + "level1": "Central Nubian", + "level2": "Kordofan Nubian", + "level3": "Western Kordofan Nubian" + }, + "eli": { + "level0": "Narrow Talodi", + "level1": "Buram-Saraf", + "level2": "Nding-Tasomi" + }, + "elk": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Au-Olo-Elkei", + "level5": "Olo-Elkei" + }, + "ell": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "South Greek", + "level5": "Central Greek", + "level6": "Koineic Greek", + "level7": "Modern Koineic Greek", + "level8": "Nuclear Modern Greek" + }, + "elm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Ogonoid", + "level5": "West Ogonoid" + }, + "elo": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Western Omo-Tana" + }, + "elp": { + "level0": "Bookkeeping" + }, + "elu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Kurti-Kele-Ere", + "level9": "Kurti-Elu" + }, + "ema": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "North-Central Edoid", + "level6": "Central Plains Edoid", + "level7": "Emaic" + }, + "emb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Tamanic-Bugis", + "level5": "Tamanic" + }, + "eme": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VIII", + "level6": "Wayampi-Zoe-Emerillon", + "level7": "Zoe-Emerillon" + }, + "emg": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Upper Arun", + "level6": "Mewahang" + }, + "emi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "St. Matthias" + }, + "emk": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Manenkan" + }, + "emn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid B" + }, + "emo": { + "level0": "Bookkeeping" + }, + "emp": { + "level0": "Chocoan", + "level1": "Embera", + "level2": "Atrato", + "level3": "Panama-Baudo-Atrato" + }, + "emq": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Muya" + }, + "ems": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Yupik" + }, + "emu": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Northwest Gondi", + "level5": "Southwest Gondi", + "level6": "Muria" + }, + "emw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Southwest Babar" + }, + "emx": { + "level0": "Speech Register", + "level1": "Basque-Romani" + }, + "emy": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Cholan-Tzeltalan", + "level4": "Cholan" + }, + "emz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "Center Ring", + "level10": "Komic" + }, + "ena": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "Apalic" + }, + "enb": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Northern Kalenjin" + }, + "enc": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Eastern Kra", + "level3": "Buyang", + "level4": "Northern Buyang" + }, + "end": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Central Flores-Paluqe", + "level6": "Central Flores", + "level7": "Eastern Central Flores", + "level8": "Ende-Lio" + }, + "enf": { + "level0": "Uralic", + "level1": "Samoyedic", + "level2": "Enets-Nenets", + "level3": "Enets" + }, + "eng": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English" + }, + "enh": { + "level0": "Uralic", + "level1": "Samoyedic", + "level2": "Enets-Nenets", + "level3": "Enets" + }, + "enl": { + "level0": "Lengua-Mascoy", + "level1": "Lengua" + }, + "enm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English" + }, + "enn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Delta Edoid", + "level6": "Degema-Engenni" + }, + "eno": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran" + }, + "enq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Engan" + }, + "enr": { + "level0": "Pauwasi", + "level1": "Eastern Pauwasi" + }, + "enu": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Bi-Ka" + }, + "env": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Igwic", + "level7": "Ikpeshic" + }, + "enw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "West Lower Cross", + "level7": "Oroic", + "level8": "Enwang-Uda" + }, + "enx": { + "level0": "Lengua-Mascoy", + "level1": "Lengua" + }, + "eot": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Western Tano" + }, + "epi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Delta Edoid" + }, + "epo": { + "level0": "Artificial Language", + "level1": "Esperantic" + }, + "era": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Malasa-Eravallan" + }, + "erg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Erromanga" + }, + "erh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Southwestern Edoid" + }, + "eri": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Nuru", + "level4": "Erimaic" + }, + "erk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Efate", + "level8": "South Efatic" + }, + "ero": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Gyalrongic", + "level5": "West Gyalrongic", + "level6": "Horpa" + }, + "err": { + "level0": "Giimbiyu", + "level1": "Urninganggic" + }, + "ers": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Ersuic" + }, + "ert": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Eritai-Obokuitai-Biritai" + }, + "erw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuta" + }, + "ese": { + "level0": "Pano-Tacanan", + "level1": "Tacanan", + "level2": "Takanik-Chamik" + }, + "esg": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Northwest Gondi", + "level5": "Southwest Gondi", + "level6": "Southern Gondi", + "level7": "Eastern Gondi" + }, + "esh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Southern Tatic", + "level10": "Ramand-Karaj" + }, + "esi": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Inuit", + "level3": "Alaskan Inupiaq" + }, + "esk": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Inuit", + "level3": "Alaskan Inupiaq" + }, + "esl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Arab Sign" + }, + "esm": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "esn": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "eso": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "RSLic" + }, + "ess": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Yupik" + }, + "est": { + "level0": "Uralic", + "level1": "Finnic" + }, + "esu": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Yupik" + }, + "esy": { + "level0": "Artificial Language" + }, + "etb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ekit-Etebi" + }, + "eth": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "etn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Efate", + "level8": "South Efatic" + }, + "eto": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Yaunde-Fang (A.70)" + }, + "etr": { + "level0": "Bosavi", + "level1": "Etoro-Bedamini" + }, + "ets": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "North-Central Edoid", + "level6": "Afenmai-Bendel", + "level7": "Uneme-Yekhee" + }, + "etu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid", + "level7": "Bakor-Ejagham" + }, + "etx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Beromic", + "level5": "Iten-Cara-Berom" + }, + "etz": { + "level0": "Mairasic" + }, + "eud": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Opata-Eudeve" + }, + "eur": { + "level0": "Bookkeeping" + }, + "eve": { + "level0": "Tungusic", + "level1": "Northeastern Tungusic", + "level2": "Northern Tungusic", + "level3": "Ewenic" + }, + "evh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Southwestern Edoid" + }, + "evn": { + "level0": "Tungusic", + "level1": "Northeastern Tungusic", + "level2": "Northern Tungusic" + }, + "ewe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Western Gbe", + "level5": "Eweic" + }, + "ewo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Yaunde-Fang (A.70)", + "level9": "Ewondo-Bebele" + }, + "ext": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic" + }, + "eya": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak" + }, + "eyo": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Central Kalenjin", + "level4": "Plateau Central Kalenjin", + "level5": "Western Plateau Central Kalenjin" + }, + "eze": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "North-South Central Delta Cross", + "level7": "Koring-Kukele", + "level8": "Kukele-Uzekwe" + }, + "fab": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Lower Guinea Portuguese", + "level15": "Bantu Layer Lower Guinea Portuguese" + }, + "fad": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Garuh-Foran" + }, + "faf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Makira" + }, + "fag": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Finungwan-Mamaa-Gusan" + }, + "fah": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Unclassified Benue-Congo" + }, + "fai": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Division A Mountain Ok", + "level7": "Tifal-Telefol", + "level8": "Tifalic", + "level9": "Faiwol-Seltaman" + }, + "faj": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "East Sogeram" + }, + "fak": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Yemne-Kimbi" + }, + "fal": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Unclassified Volta-Congo", + "level3": "Adamawa Fali" + }, + "fam": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Unclassified Bantoid" + }, + "fan": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Yaunde-Fang (A.70)" + }, + "fao": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "West Scandinavian", + "level6": "Icelandic-Faroese" + }, + "fap": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Cangin", + "level3": "Palor-Ndut" + }, + "far": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita", + "level9": "North Malaitan" + }, + "fas": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic" + }, + "fau": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "West Tariku", + "level3": "Fayu-Kirikiri" + }, + "fax": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance" + }, + "fay": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian" + }, + "faz": { + "level0": "Bookkeeping" + }, + "fcs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "fer": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Feroge-Mangaya" + }, + "ffi": { + "level0": "Bookkeeping" + }, + "ffm": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula" + }, + "fia": { + "level0": "Nubian", + "level1": "Nile Nubian", + "level2": "Nobiin Nubian" + }, + "fie": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.4", + "level5": "Fyer-Tambas" + }, + "fif": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Sayhadic", + "level5": "Modern Sayhadic" + }, + "fij": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Eastern Fijian", + "level7": "Nuclear Eastern Fijian", + "level8": "Viwa-Lomaiviti-East Viti Levu" + }, + "fil": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Tagalogic", + "level5": "Tagalog-Filipino" + }, + "fin": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Nuclear Finnish" + }, + "fip": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mwika", + "level10": "Fipaic" + }, + "fir": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Zaric", + "level6": "Nuclear Zaric", + "level7": "Izeric" + }, + "fit": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Nuclear Finnish" + }, + "fiw": { + "level0": "East Kutubu" + }, + "fiz": { + "level0": "Bookkeeping" + }, + "fkk": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Higic", + "level5": "Nkafa-Kirya-Bana", + "level6": "Nkafa-Kirya" + }, + "fkv": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Nuclear Finnish" + }, + "fla": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Southern Interior Salish", + "level3": "Okanaganic", + "level4": "Kalispel-Spokane" + }, + "flh": { + "level0": "Lakes Plain", + "level1": "East Lakes Plain" + }, + "fli": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Gude-Jimi-Zizilivakan", + "level7": "Fali-Gude" + }, + "fll": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Unclassified Volta-Congo", + "level3": "Adamawa Fali" + }, + "flm": { + "level0": "Bookkeeping" + }, + "fln": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Flinders-Barrow" + }, + "flr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "Forest Kivu", + "level12": "Fuliiric", + "level13": "Fuliiru-Vira" + }, + "fly": { + "level0": "Speech Register", + "level1": "Indo-European Speech Register" + }, + "fmp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "East Bamileke" + }, + "fmu": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Northwest Gondi", + "level5": "Southwest Gondi", + "level6": "Southern Gondi", + "level7": "Eastern Gondi" + }, + "fnb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Ambrym", + "level7": "Orkon-West Ambrym" + }, + "fng": { + "level0": "Pidgin", + "level1": "Zulu-based pidgin" + }, + "fni": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Inland Bua", + "level6": "Goulaic", + "level7": "Zan-Kulaalic", + "level8": "Kulaalic" + }, + "fod": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "Mountain Oti North Guang", + "level10": "Gikyode-Ginyanga", + "level11": "Gikyode-Foodo" + }, + "foi": { + "level0": "East Kutubu" + }, + "fom": { + "level0": "Bookkeeping" + }, + "fon": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Fongbeic" + }, + "for": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Fore-Gimi" + }, + "fos": { + "level0": "Austronesian", + "level1": "East Formosan" + }, + "fpe": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "West African Creole English" + }, + "fqs": { + "level0": "Baibai-Fas" + }, + "fra": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Global French" + }, + "frc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil" + }, + "frd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuclear Tanimbar-Bomberai", + "level4": "Kei-Fordata" + }, + "fri": { + "level0": "Bookkeeping" + }, + "fro": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil" + }, + "frp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Francoprovencalic" + }, + "frq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup", + "level4": "Nuclear Warup" + }, + "frr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Frisian" + }, + "frs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Alts\u00e4chsisch", + "level7": "Middle-Modern Low German", + "level8": "Low German", + "level9": "West Low German", + "level10": "North Low Saxon" + }, + "frt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo" + }, + "fry": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Frisian", + "level8": "Modern West Frisian", + "level9": "Westlauwers-Terschelling Frisian" + }, + "fse": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Swedish Sign", + "level3": "Finnish Sign" + }, + "fsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic" + }, + "fss": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Swedish Sign", + "level3": "Finnish Sign" + }, + "fub": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula", + "level4": "Eastern Fula", + "level5": "Adamawa-Bagirmi Fulfulde" + }, + "fuc": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula" + }, + "fud": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian" + }, + "fue": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula" + }, + "fuf": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula" + }, + "fuh": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula" + }, + "fui": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula", + "level4": "Eastern Fula", + "level5": "Adamawa-Bagirmi Fulfulde" + }, + "fuj": { + "level0": "Heibanic", + "level1": "Eastern Heibanic" + }, + "fum": { + "level0": "Bookkeeping" + }, + "fuq": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula", + "level4": "Eastern Fula" + }, + "fur": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian" + }, + "fut": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Vanuatu-Loyalty Outliers", + "level9": "Mele-Futuna" + }, + "fuu": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Peripherique", + "level6": "Barh Keita" + }, + "fuv": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer", + "level3": "Fula", + "level4": "Eastern Fula" + }, + "fvr": { + "level0": "Furan" + }, + "fwa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Nmi-Pij-Fwa-Pam-Pap", + "level10": "Nmi-Fij-Fwa", + "level11": "Hyenghene" + }, + "fwe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Western Botatwe", + "level9": "Zambezi Hook" + }, + "gaa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ga-Dangme" + }, + "gab": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.2", + "level5": "East Chadic A.2 2", + "level6": "Gabri-Kimre" + }, + "gad": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Gaddangic", + "level6": "Cagayan-Baliwon Gaddang" + }, + "gae": { + "level0": "Arawakan", + "level1": "Alto Orinoco", + "level2": "Parenic" + }, + "gaf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka" + }, + "gag": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz", + "level4": "West Oghuz" + }, + "gah": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Gahuku" + }, + "gai": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ottilien" + }, + "gaj": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Gauwa", + "level4": "Gadsup-Agarabi" + }, + "gak": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Sahuan", + "level3": "Nuclear Sahuan" + }, + "gal": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Wetar-Atauro" + }, + "gam": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Jimi", + "level3": "Kandawo-Narak" + }, + "gan": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic" + }, + "gao": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "East Sogeram" + }, + "gap": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Garuh-Foran" + }, + "gaq": { + "level0": "Austroasiatic", + "level1": "Mundaic" + }, + "gar": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Dobu-Duau linkage", + "level9": "Boselewa-Galeya" + }, + "gas": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Garasia Bhil" + }, + "gat": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Unclassified Kainantu-Goroka" + }, + "gau": { + "level0": "Dravidian", + "level1": "Central Dravidian", + "level2": "Parji-Ollari-Gadaba", + "level3": "Ollari-Gadaba" + }, + "gav": { + "level0": "Bookkeeping" + }, + "gaw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Garuh-Foran" + }, + "gax": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Nuclear Oromo", + "level8": "Central-Eastern Oromo", + "level9": "Central Oromo" + }, + "gay": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran" + }, + "gaz": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Nuclear Oromo" + }, + "gbb": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Arandic" + }, + "gbd": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Marrngu" + }, + "gbe": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Western Sepik Hill", + "level3": "Hewa-April River" + }, + "gbf": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Sawosic", + "level3": "Burui-Gaikundi" + }, + "gbg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "River Western Mundu-Baka" + }, + "gbh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Fongbeic" + }, + "gbi": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran", + "level3": "Galela-Loloda" + }, + "gbj": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "Gutob-Remo" + }, + "gbk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic" + }, + "gbl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "gbm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Indo-Aryan Northern zone", + "level8": "Central Pahari" + }, + "gbn": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "Baka-Beli", + "level3": "Morokodo-Beli", + "level4": "Gberi-Morokodo-Mittu" + }, + "gbo": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Liberian Grebo", + "level6": "North-Central Liberian Grebo" + }, + "gbp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Meridional-Occidental", + "level5": "Bokoto-Gbeya", + "level6": "Gbeya", + "level7": "Gbeya-Suma" + }, + "gbq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Meridional-Occidental", + "level5": "Bokoto-Gbeya", + "level6": "Gbeya" + }, + "gbr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Gbagyi-Gbari" + }, + "gbs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Western Phla-Phera" + }, + "gbv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Oriental", + "level5": "Gbanu-Manza-Ngbaka" + }, + "gbw": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "North Coast Pama-Nyungan", + "level3": "Waka-Kabic", + "level4": "Eastern Waka-Kabic" + }, + "gbx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Eastern Phla-Phera" + }, + "gby": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Gbagyi-Gbari" + }, + "gbz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Central Iran Kermanic", + "level8": "Nuclear Central Iran Kermanic", + "level9": "Yazdi-Kermani-Nayini" + }, + "gcc": { + "level0": "Baining" + }, + "gcd": { + "level0": "Tangkic", + "level1": "Southern Tangkic" + }, + "gce": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "Oregon Athabaskan", + "level5": "Rogue River" + }, + "gcf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Circum-Caribbean French", + "level16": "Lesser Antillean French Creole" + }, + "gcl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Vincent-Grenadian Creole", + "level15": "Grenada-Tobago Creole" + }, + "gcn": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Coastal Binanderean", + "level6": "Gaena-Korafe" + }, + "gcr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Circum-Caribbean French", + "level16": "Guyanic Creole French" + }, + "gct": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Alemannic", + "level10": "North Alemannic" + }, + "gda": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewaric" + }, + "gdb": { + "level0": "Dravidian", + "level1": "Central Dravidian", + "level2": "Parji-Ollari-Gadaba", + "level3": "Ollari-Gadaba" + }, + "gdc": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Northern Maric", + "level5": "Warungu-Gugu Badhun" + }, + "gdd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Western Bel" + }, + "gde": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Gude-Jimi-Zizilivakan", + "level7": "Fali-Gude" + }, + "gdf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Dghwedeic", + "level7": "Gudufic" + }, + "gdg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Gaddangic", + "level6": "Cagayan-Baliwon Gaddang" + }, + "gdh": { + "level0": "Jarrakan", + "level1": "Miriwunic" + }, + "gdi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "Baka-Gundi" + }, + "gdj": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Norman Pama", + "level3": "Kuthant-Gurdjar", + "level4": "Rib-Gurdjar" + }, + "gdk": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.1" + }, + "gdl": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Konsoid", + "level8": "Gidole-Bussa" + }, + "gdn": { + "level0": "Dagan" + }, + "gdo": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic", + "level4": "Botlikh-Godoberi" + }, + "gdq": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Modern South Arabian", + "level4": "Hobyot-Western MSA", + "level5": "Western MSA" + }, + "gdr": { + "level0": "Eastern Trans-Fly" + }, + "gds": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "gdu": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic" + }, + "gdx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewati-Gojri" + }, + "gea": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Gera-Geruma-Kubi-Deno", + "level9": "Gera-Geruma" + }, + "geb": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ruboni" + }, + "gec": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Liberian Grebo", + "level6": "North-Central Liberian Grebo", + "level7": "Barclayville-Gboloo-Central Liberian Grebo", + "level8": "Gboloo-Central Grebo" + }, + "ged": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo" + }, + "geg": { + "level0": "Bookkeeping" + }, + "geh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Bairisch", + "level10": "Global South Bavarian" + }, + "gei": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "South Halmahera" + }, + "gej": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Western Gbe" + }, + "gek": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Goemaic" + }, + "gel": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Northwestern Kainji", + "level6": "Dukaic", + "level7": "Main-Gwamhi" + }, + "gen": { + "level0": "Bookkeeping" + }, + "geq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Zandic", + "level6": "Zande-Nzakara" + }, + "ges": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "Banda-Geser", + "level4": "Seran Laut", + "level5": "Geser-Gorom-Bati" + }, + "gev": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "B10-B30", + "level8": "Okani (B.30)", + "level9": "Southern Okani" + }, + "gew": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Gera-Geruma-Kubi-Deno", + "level9": "Gera-Geruma" + }, + "gex": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana", + "level8": "Karre-Boni" + }, + "gey": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Mituku-Lega", + "level9": "Mitukuic" + }, + "gez": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic" + }, + "gfk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Patpatar-Minigir-Tolai" + }, + "gft": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Outer South Ethiopic", + "level6": "N-Group" + }, + "gga": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Santa Isabel", + "level10": "East Santa Isabel" + }, + "ggb": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Bassa-Klao", + "level5": "Bassaic" + }, + "ggd": { + "level0": "Pama-Nyungan", + "level1": "Paman" + }, + "gge": { + "level0": "Maningrida", + "level1": "Bureran" + }, + "ggg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Unclassified Western Hindi", + "level11": "Ghera-Gurgula" + }, + "ggh": { + "level0": "Bookkeeping" + }, + "ggl": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Greater Yaganon", + "level4": "Yaganon", + "level5": "Ganglau-Saep" + }, + "ggm": { + "level0": "Bookkeeping" + }, + "ggr": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Alaya-Athima", + "level3": "Thaypanic" + }, + "ggt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Ngero", + "level8": "Western Ngero", + "level9": "Tuam" + }, + "ggu": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Nwa-Ben", + "level4": "Ben-Gban" + }, + "ggw": { + "level0": "Suki-Gogodala", + "level1": "Gogodalic" + }, + "gha": { + "level0": "Afro-Asiatic", + "level1": "Berber" + }, + "ghe": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Ghale", + "level5": "Nuclear Ghale" + }, + "ghh": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Ghale", + "level5": "Nuclear Ghale" + }, + "ghk": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Northern Karen" + }, + "ghl": { + "level0": "Nubian", + "level1": "Central Nubian", + "level2": "Kordofan Nubian", + "level3": "Eastern Kordofan Nubian" + }, + "ghn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "West New Georgia", + "level11": "Simboic", + "level12": "Ghanongga-Lungga" + }, + "gho": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Kabyle-Atlas Berber", + "level3": "Atlas Berber", + "level4": "Northwestern Moroccan Berber" + }, + "ghr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Unclassified Western Hindi", + "level11": "Ghera-Gurgula" + }, + "ghs": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean" + }, + "ght": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Ghale" + }, + "gia": { + "level0": "Jarrakan" + }, + "gib": { + "level0": "Pidgin", + "level1": "Hausa-based pidgin" + }, + "gic": { + "level0": "Unclassifiable" + }, + "gid": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara" + }, + "gie": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Neyo-Dida", + "level3": "Dida", + "level4": "Guebie-Lakota Dida" + }, + "gig": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Western Rajasthani" + }, + "gih": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Greater Bandjalangic", + "level4": "Bandjalangic", + "level5": "Inland Bandjalang" + }, + "gii": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana" + }, + "gil": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian" + }, + "gim": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Fore-Gimi" + }, + "gin": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Tsezic", + "level4": "West Tsezic" + }, + "gio": { + "level0": "Bookkeeping" + }, + "gip": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Arawe", + "level11": "West Arawe" + }, + "giq": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Gauic", + "level5": "Gelaoic", + "level6": "Southwestern Gelao" + }, + "gir": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Gauic", + "level5": "Gelaoic", + "level6": "Northern Gelao", + "level7": "Ahouic" + }, + "gis": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Maroua", + "level5": "Giziga" + }, + "git": { + "level0": "Tsimshian", + "level1": "Nishga-Gitxsan" + }, + "giu": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Gauic", + "level5": "Gelaoic", + "level6": "Northern Gelao" + }, + "giw": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Gauic", + "level5": "Gelaoic", + "level6": "Southwestern Gelao" + }, + "gix": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "River Western Mundu-Baka", + "level8": "Bwaka" + }, + "giy": { + "level0": "Unattested" + }, + "giz": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Maroua", + "level5": "Giziga" + }, + "gjk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Gujaratic", + "level10": "Western Gujaratic" + }, + "gjm": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Kulin-Bunganditj", + "level4": "Warrnambool-Bunganditj" + }, + "gjn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Gonja-Dompo" + }, + "gjr": { + "level0": "Mixed Language", + "level1": "Gurindji-Kriol" + }, + "gju": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewati-Gojri" + }, + "gka": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup", + "level4": "Nuclear Warup", + "level5": "Unclassified Nuclear Warup" + }, + "gkd": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "East Sogeram", + "level6": "Aisian" + }, + "gke": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Northern Mbum", + "level6": "Dama-Galke" + }, + "gkn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Ogonoid", + "level5": "East Ogonoid" + }, + "gko": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Norman Pama" + }, + "gkp": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Kpelle" + }, + "gku": { + "level0": "Tuu", + "level1": "!Ui", + "level2": "Ghaap-Kalahari" + }, + "gla": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Goidelic", + "level7": "Modern Goidelic", + "level8": "Eastern Goidelic" + }, + "glb": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3" + }, + "glc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Inland Bua", + "level6": "Goulaic" + }, + "gld": { + "level0": "Tungusic", + "level1": "Central-Western Tungusic" + }, + "gle": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Goidelic", + "level7": "Modern Goidelic" + }, + "glg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance" + }, + "glh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Pashayi", + "level5": "Western Pashayi" + }, + "gli": { + "level0": "Bookkeeping" + }, + "glj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Inland Bua", + "level6": "Goulaic", + "level7": "Zan-Kulaalic", + "level8": "Kulaalic" + }, + "glk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Caspian", + "level8": "Gilaki-Rudbari" + }, + "gll": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Badjiri-Eastern Karnic", + "level3": "Eastern Karnic" + }, + "glo": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Galambu-Bele" + }, + "glr": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee", + "level5": "Guere-Krahn" + }, + "glu": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Bagirmic", + "level6": "Morom-Jaya-Naba", + "level7": "Bayo-Morom" + }, + "glv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Goidelic", + "level7": "Modern Goidelic", + "level8": "Eastern Goidelic" + }, + "glw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Wandala-Malgwa-Glavda" + }, + "gma": { + "level0": "Worrorran", + "level1": "Northern Worrorran", + "level2": "Forrest River" + }, + "gmb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita" + }, + "gmd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Northern Bikwin-Jen", + "level6": "Mak-Tal" + }, + "gmg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "Unclassified Sogeram" + }, + "gmh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German" + }, + "gml": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Alts\u00e4chsisch", + "level7": "Middle-Modern Low German" + }, + "gmm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Oriental", + "level5": "Mbodomo-Bofi" + }, + "gmn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Northern Samba-Duru", + "level7": "Vere-Gimme", + "level8": "Koma Alantika" + }, + "gmo": { + "level0": "Bookkeeping" + }, + "gmu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Gum" + }, + "gmv": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo", + "level3": "Central Ometo", + "level4": "Dawro-Gofa-Gamo" + }, + "gmx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Kinga-Magoma" + }, + "gmy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "South Greek" + }, + "gna": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Kaansa-Dogose" + }, + "gnb": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Sizangic", + "level6": "Gangte-Vaiphei" + }, + "gnc": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Unclassified Berber" + }, + "gnd": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Meri" + }, + "gne": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Zaric", + "level6": "Nuclear Zaric", + "level7": "Izeric" + }, + "gng": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Gurma", + "level12": "Gurma B", + "level13": "Konkomba-Gangam" + }, + "gnh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos" + }, + "gni": { + "level0": "Bunaban" + }, + "gnj": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Nwa-Ben", + "level4": "Ben-Gban", + "level5": "Bengic" + }, + "gnk": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "West-Kxoe", + "level4": "Naro-Ana", + "level5": "Ana" + }, + "gnl": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Eastern Maric" + }, + "gnm": { + "level0": "Dagan", + "level1": "Southeast Dagan" + }, + "gnn": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Southern Yolngu", + "level3": "Southern-Eastern Yolngu", + "level4": "Dhuwal-Dhuwala", + "level5": "Western Dhuwal-Dhuwala" + }, + "gno": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Northwest Gondi" + }, + "gnq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic" + }, + "gnr": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "North Coast Pama-Nyungan", + "level3": "Waka-Kabic", + "level4": "Eastern Waka-Kabic" + }, + "gnt": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Tonda" + }, + "gnu": { + "level0": "Nuclear Torricelli", + "level1": "Unclassified Nuclear Torricelli" + }, + "gnw": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.B" + }, + "gnz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "Baka-Gundi", + "level8": "Baka complex" + }, + "goa": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Mano-Dan", + "level4": "Guro-Dan", + "level5": "Guro-Yaoure" + }, + "gob": { + "level0": "Guahiboan", + "level1": "Nuclear Guahiboan", + "level2": "Central Guahibo", + "level3": "Guahibo-Playero" + }, + "goc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage", + "level9": "Mumeng" + }, + "god": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Beteic", + "level3": "Western Bete" + }, + "goe": { + "level0": "Sino-Tibetan" + }, + "gof": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo", + "level3": "Central Ometo", + "level4": "Dawro-Gofa-Gamo" + }, + "gog": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "West Ruvu" + }, + "goh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German" + }, + "goi": { + "level0": "East Strickland", + "level1": "Kubo-Samo-Bibo" + }, + "goj": { + "level0": "Bookkeeping" + }, + "gok": { + "level0": "Bookkeeping" + }, + "gol": { + "level0": "Atlantic-Congo" + }, + "gom": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani" + }, + "goo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Eastern Fijian", + "level7": "Nuclear Eastern Fijian" + }, + "gop": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Southwest Cenderawasih Bay" + }, + "goq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay", + "level7": "Manadoic Malay" + }, + "gor": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Gorontalic" + }, + "gos": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Alts\u00e4chsisch", + "level7": "Middle-Modern Low German", + "level8": "Low German", + "level9": "West Low German", + "level10": "North Low Saxon" + }, + "got": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "East Germanic" + }, + "gou": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Dabaic", + "level5": "Buwal-Gavar" + }, + "gov": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Mano-Dan", + "level4": "Guro-Dan", + "level5": "Dan-Toura", + "level6": "Toura-Goo" + }, + "gow": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "South Cushitic", + "level3": "Greater West Rift South Cushitic", + "level4": "West Rift South Cushitic", + "level5": "Northern West Rift South Cushitic", + "level6": "Iraqwoid" + }, + "gox": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic", + "level9": "Mid-Southern Central Core Bandaic" + }, + "goy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Kim-Besme-Goundo" + }, + "goz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic" + }, + "gpa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid", + "level6": "Dibo-Kupa", + "level7": "Abawa", + "level8": "Kami-Gupa" + }, + "gpe": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "West African Creole English", + "level13": "Coastal Nigerian Krio" + }, + "gqa": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Teraic", + "level5": "Eastern Tera" + }, + "gqi": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic" + }, + "gqr": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone-Chari", + "level7": "Bediondo" + }, + "gqu": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Gauic", + "level5": "Gelaoic" + }, + "gra": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Garasia Bhil" + }, + "grc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "South Greek", + "level5": "Central Greek" + }, + "grd": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Guruntumic" + }, + "grg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Gusap-Mot", + "level4": "Gira-Neko-Nekgini" + }, + "grh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru" + }, + "gri": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Guadalcanal-Nggelic", + "level6": "Nuclear Guadalcanal-Nggelic", + "level7": "North and West Guadalcanal" + }, + "grj": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Liberian Grebo" + }, + "grm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic" + }, + "gro": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic", + "level7": "Dzongkhic" + }, + "grq": { + "level0": "Ramu", + "level1": "Agoan" + }, + "grr": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic", + "level4": "Northern Saharan Oasis Berber" + }, + "grs": { + "level0": "Nimboranic", + "level1": "Outer Nimboranic", + "level2": "Mlap-Gresi-Kemtuik", + "level3": "Gresi-Kemtuik" + }, + "grt": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo" + }, + "gru": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Outer South Ethiopic", + "level6": "N-Group" + }, + "grv": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Liberian Grebo", + "level6": "North-Central Liberian Grebo", + "level7": "Barclayville-Gboloo-Central Liberian Grebo", + "level8": "Gboloo-Central Grebo" + }, + "grw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage", + "level11": "Eastern Taupota" + }, + "gry": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Liberian Grebo", + "level6": "North-Central Liberian Grebo", + "level7": "Barclayville-Gboloo-Central Liberian Grebo" + }, + "grz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Patpatar-Minigir-Tolai", + "level9": "Minigir-Tolai" + }, + "gsc": { + "level0": "Bookkeeping" + }, + "gse": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "American Sign" + }, + "gsg": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "DGSic" + }, + "gsl": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "Gusilay-Bandial" + }, + "gsm": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "gsn": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Finungwan-Mamaa-Gusan" + }, + "gso": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Meridional-Occidental", + "level5": "Gbaya Meridional" + }, + "gsp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Greater Yaganon" + }, + "gss": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic" + }, + "gsw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Alemannic", + "level10": "South Alemannic" + }, + "gti": { + "level0": "Bookkeeping" + }, + "gtu": { + "level0": "Bookkeeping" + }, + "gua": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Jaku-Gubi" + }, + "guc": { + "level0": "Arawakan", + "level1": "Caribbean Arawakan", + "level2": "Guajiro-Paraujano" + }, + "gud": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Neyo-Dida", + "level3": "Dida" + }, + "gue": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Ngumpin", + "level4": "Eastern Ngumpin", + "level5": "Ngumpit" + }, + "guf": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Southern Yolngu", + "level3": "Southern-Eastern Yolngu", + "level4": "Dhuwal-Dhuwala", + "level5": "Eastern Dhuwal-Dhuwala" + }, + "gug": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.A", + "level8": "Paraguay-Brazil Guarani" + }, + "guh": { + "level0": "Guahiboan", + "level1": "Nuclear Guahiboan", + "level2": "Central Guahibo", + "level3": "Guahibo-Playero" + }, + "gui": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.B", + "level8": "Chiriguanic" + }, + "guj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Gujaratic" + }, + "guk": { + "level0": "Gumuz", + "level1": "Nuclear Gumuz" + }, + "gul": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Gullah-Nevis-Antigua", + "level15": "Gullah" + }, + "gum": { + "level0": "Barbacoan", + "level1": "Coconucan" + }, + "gun": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.A" + }, + "guo": { + "level0": "Guahiboan" + }, + "gup": { + "level0": "Gunwinyguan" + }, + "guq": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I" + }, + "gur": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Mossi-Farefare", + "level14": "Farefareic" + }, + "gus": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "gut": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Votic Chibchan" + }, + "guu": { + "level0": "Yanomamic", + "level1": "Ninam-Yanomam-Yaroame", + "level2": "Yanomam-Yaroame", + "level3": "Yanomam-Yanimamo" + }, + "guv": { + "level0": "Bookkeeping" + }, + "guw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Fongbeic" + }, + "gux": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Gurma", + "level12": "Gurma B", + "level13": "Gourmantche-Moba" + }, + "guz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "North Mara" + }, + "gva": { + "level0": "Lengua-Mascoy", + "level1": "Eastern Enlhet-Enenlhet" + }, + "gvc": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan II", + "level4": "Kotiria-Piratapuyo" + }, + "gve": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Lower Markham", + "level9": "Busu" + }, + "gvf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Simbu", + "level3": "Nuclear Simbu", + "level4": "Golinic" + }, + "gvj": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VIII", + "level6": "Guaja-Kaapor-Ava", + "level7": "Guaja-Aure-Aura" + }, + "gvl": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Chari" + }, + "gvm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Shiroro" + }, + "gvn": { + "level0": "Pama-Nyungan", + "level1": "Yimidhirr-Yalanji-Yidinic", + "level2": "Yalandyic" + }, + "gvo": { + "level0": "Tupian", + "level1": "Monde", + "level2": "Gavianic", + "level3": "Nuclear Gavianic" + }, + "gvp": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Goyaz", + "level4": "Northern Je", + "level5": "Eastern Timbira", + "level6": "Southeastern Timbira" + }, + "gvr": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic", + "level5": "Gurungic" + }, + "gvs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage" + }, + "gvy": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Northern Thura-Yura" + }, + "gwa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Potou" + }, + "gwb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Unclassified Nigerian Jarawan" + }, + "gwc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Dir-Swat Kohistani" + }, + "gwd": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Transversal Lowland East Cushitic", + "level6": "Dullay" + }, + "gwe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Kilimanjaro Bantu" + }, + "gwf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Indus Kohistanic", + "level9": "Outer Indus Kohistani" + }, + "gwg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Southern Bikwin-Jen", + "level6": "Bambuka-Gomu-Leelau" + }, + "gwi": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Gwichin-Han" + }, + "gwj": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "West-Kxoe", + "level4": "Naro-Ana", + "level5": "Ana" + }, + "gwn": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.1" + }, + "gwr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "North Nyanza" + }, + "gwt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Gawarbatic" + }, + "gwu": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric" + }, + "gww": { + "level0": "Worrorran", + "level1": "Northern Worrorran", + "level2": "Forrest River" + }, + "gwx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "South Guang", + "level8": "Hill South Guang", + "level9": "Gua-Cherepon" + }, + "gxx": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee", + "level5": "Guere-Krahn", + "level6": "Guere" + }, + "gya": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Meridional-Occidental" + }, + "gyb": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Rempic" + }, + "gyd": { + "level0": "Tangkic", + "level1": "Southern Tangkic", + "level2": "Kayardild-Yangkaal" + }, + "gye": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "Lameic" + }, + "gyf": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric" + }, + "gyg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic" + }, + "gyi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Western A80", + "level10": "Mvumboic", + "level11": "Kwasio-Gyele" + }, + "gyl": { + "level0": "South Omotic", + "level1": "AHK", + "level2": "Aari-Gayil" + }, + "gym": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Eastern Isthmic Chibchan", + "level4": "Guaymiic" + }, + "gyn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Barbados-Eustatius", + "level15": "Barbados-Trinidad" + }, + "gyr": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup II" + }, + "gyy": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric", + "level5": "Margany-Gunya" + }, + "gyz": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Northwest South Bauchi", + "level7": "Gejic" + }, + "gza": { + "level0": "Blue Nile Mao", + "level1": "West Mao" + }, + "gzi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Central Iran Kermanic", + "level8": "Nuclear Central Iran Kermanic" + }, + "gzn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "South Halmahera", + "level6": "East Makian-Gane" + }, + "haa": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Gwichin-Han" + }, + "hab": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Vietnamese Sign" + }, + "hac": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Gorani" + }, + "had": { + "level0": "Hatam-Mansim" + }, + "hae": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Nuclear Oromo", + "level8": "Central-Eastern Oromo", + "level9": "South-East-North Oromo" + }, + "haf": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Vietnamese Sign" + }, + "hag": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Southeast Western Oti-Volta", + "level13": "Kamara-Hanga" + }, + "hah": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic", + "level11": "Buka", + "level12": "Saposa-Tinputz", + "level13": "Tinputzic" + }, + "haj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga", + "level10": "Eastern Bengali" + }, + "hak": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic" + }, + "hal": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Kayong-Jeh-Halang", + "level4": "Jeh-Halang" + }, + "ham": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Western Sepik Hill", + "level3": "Hewa-April River" + }, + "han": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "West Highlands Kivu", + "level12": "Rundic", + "level13": "Hangaza-Shubi" + }, + "hao": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic", + "level11": "Buka", + "level12": "Haliaic" + }, + "hap": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Central Dani", + "level3": "Grand Valley Dani", + "level4": "Southeast Grand Valley Dani" + }, + "haq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "West Highlands Kivu", + "level12": "Rundic" + }, + "har": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Harari-East Gurage" + }, + "has": { + "level0": "Wakashan", + "level1": "Northern Wakashan" + }, + "hat": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Circum-Caribbean French" + }, + "hau": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.1" + }, + "hav": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "Forest Kivu", + "level12": "Hunde-Havu" + }, + "haw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal" + }, + "hax": { + "level0": "Haida" + }, + "hay": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "South Rutara" + }, + "haz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic", + "level9": "Eastern Farsic" + }, + "hba": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Tetelaic" + }, + "hbb": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Bura-Marghi", + "level6": "Marghic", + "level7": "Kilba-South Margi" + }, + "hbn": { + "level0": "Heibanic", + "level1": "West-Central Heibanic", + "level2": "Central Heibanic", + "level3": "Ebang-Logol", + "level4": "Ebang-Laru" + }, + "hbo": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Canaanite", + "level6": "Hebrewic" + }, + "hbs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "South Slavic", + "level5": "Western South Slavic" + }, + "hbu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Eastern Timor", + "level4": "Central Timoric A" + }, + "hca": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Hindustani" + }, + "hch": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Corachol" + }, + "hdn": { + "level0": "Haida" + }, + "hds": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "Honduras-Panama Sign" + }, + "hdy": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Highland East Cushitic", + "level4": "Sidaama-Hadiyya-Kambaata", + "level5": "Hadiyya-Kambaata", + "level6": "Hadiyyaic" + }, + "hea": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "East Hmongic", + "level5": "Northeastern Qiandongic Miao" + }, + "heb": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Canaanite", + "level6": "Hebrewic" + }, + "hed": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "South Masa", + "level4": "Peveic", + "level5": "Hede-Ngide" + }, + "heg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar" + }, + "heh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Bena-Hehe" + }, + "hei": { + "level0": "Wakashan", + "level1": "Northern Wakashan", + "level2": "Kwakiutlan" + }, + "hem": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde", + "level9": "Lubaic", + "level10": "Bangubangu-Kasai" + }, + "her": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Herero (R.30)" + }, + "hgm": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Khoekhoe", + "level3": "North Khoekhoe" + }, + "hgw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage", + "level11": "Eastern Taupota" + }, + "hhi": { + "level0": "Anim", + "level1": "Inland Gulf of Papua", + "level2": "West Inland Gulf of Papua", + "level3": "Hoyaic" + }, + "hhr": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "FH-Jola", + "level7": "PF-Jola", + "level8": "Her-Ejamat" + }, + "hhy": { + "level0": "Anim", + "level1": "Inland Gulf of Papua", + "level2": "West Inland Gulf of Papua", + "level3": "Hoyaic" + }, + "hia": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Lamang-Hdi" + }, + "hib": { + "level0": "Hibito-Cholon" + }, + "hid": { + "level0": "Siouan", + "level1": "Missouri River Siouan" + }, + "hif": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Hindustani" + }, + "hig": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Higic", + "level5": "Nkafa-Kirya-Bana", + "level6": "Nkafa-Kirya" + }, + "hih": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Tibor", + "level7": "Nuclear Tibor" + }, + "hii": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Nuclear Himachali" + }, + "hij": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Basaa-Yaunde (A40-70)", + "level8": "Basaa (A.40)", + "level9": "Basaa-Bakoko", + "level10": "Basaa-Hijuk" + }, + "hik": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Ambonic", + "level8": "Central Ambon" + }, + "hil": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Peripheral Central Bisayan", + "level7": "Capiznon-Ilonggo-Kawayan" + }, + "hin": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Hindustani" + }, + "hio": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "Ost-Kxoe", + "level4": "Tshwa Khoe" + }, + "hir": { + "level0": "Unattested", + "level1": "Arawan (Unattested)" + }, + "hit": { + "level0": "Indo-European", + "level1": "Anatolian" + }, + "hiw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage", + "level7": "Hiw-Lo-Toga" + }, + "hix": { + "level0": "Cariban", + "level1": "Parukotoan", + "level2": "Waiwaian" + }, + "hji": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic" + }, + "hka": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Kilimanjaro Bantu", + "level10": "Chaga", + "level11": "Central Kilimanjaro" + }, + "hke": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "Forest Kivu", + "level12": "Hunde-Havu" + }, + "hkh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Kashmiric" + }, + "hkk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Orokaivic" + }, + "hkn": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau", + "level5": "Stieng" + }, + "hks": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "CSLic" + }, + "hla": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic", + "level11": "Buka", + "level12": "Haliaic" + }, + "hlb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Halbic" + }, + "hld": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Kayong-Jeh-Halang", + "level4": "Unclassified Kayong-Jeh-Halang" + }, + "hle": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Lipo-Lolopo", + "level7": "Unclassified Lipo-Lolopo" + }, + "hlt": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "South Peripheral Kuki-Chin", + "level5": "Choic" + }, + "hlu": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic", + "level4": "Luvic", + "level5": "Luvian" + }, + "hma": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Mashan" + }, + "hmb": { + "level0": "Songhay", + "level1": "Eastern Songhay" + }, + "hmc": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Huishui" + }, + "hmd": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian" + }, + "hme": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Huishui" + }, + "hmf": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian", + "level7": "First Vernacular Hmong", + "level8": "Far Western Miao", + "level9": "Unclassified First Vernacular Hmong" + }, + "hmg": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Guiyang" + }, + "hmh": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Huishui" + }, + "hmi": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Huishui" + }, + "hmj": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian" + }, + "hml": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian" + }, + "hmm": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Mashan" + }, + "hmo": { + "level0": "Pidgin", + "level1": "Motu-based pidgin" + }, + "hmp": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Mashan" + }, + "hmq": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "East Hmongic", + "level5": "Northeastern Qiandongic Miao", + "level6": "Eastern Qiandongic Miao" + }, + "hmr": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Mizoic", + "level6": "Hmaric", + "level7": "Hmar-Saihriem" + }, + "hms": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "East Hmongic", + "level5": "South Qiandongic Miao" + }, + "hmt": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Kapau-Menya" + }, + "hmu": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "West Alor" + }, + "hmv": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian", + "level7": "First Vernacular Hmong", + "level8": "Far Western Miao", + "level9": "Unclassified First Vernacular Hmong" + }, + "hmw": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Mashan" + }, + "hmy": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Guiyang" + }, + "hmz": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian", + "level7": "First Vernacular Hmong" + }, + "hna": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Dabaic" + }, + "hnd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic", + "level9": "Hindko-Siraiki", + "level10": "Hindko" + }, + "hne": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Eastern Hindi" + }, + "hng": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "Southeastern Kikongo", + "level20": "Southern Kikongo" + }, + "hnh": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "West-Kxoe", + "level4": "Kxoe-Ani" + }, + "hni": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Ha-Ya" + }, + "hnj": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian", + "level7": "First Vernacular Hmong", + "level8": "Far Western Miao" + }, + "hnn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "South Mangyan" + }, + "hno": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic", + "level9": "Hindko-Siraiki", + "level10": "Hindko" + }, + "hns": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan", + "level10": "Bhojpuric" + }, + "hnu": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Cuoi" + }, + "hoa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "East New Georgia", + "level11": "Rovianic", + "level12": "Hoava-Kusaghe" + }, + "hob": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Upper Markham", + "level9": "Mountain Upper Markham" + }, + "hoc": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric", + "level5": "Ho-Mundari" + }, + "hod": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Nzanyic" + }, + "hoe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Southeastern Benue-Congo Plateau", + "level5": "Horom-Fyem" + }, + "hoh": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Modern South Arabian", + "level4": "Hobyot-Western MSA" + }, + "hoi": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Koyukonic" + }, + "hoj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Eastern Rajasthani" + }, + "hol": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbala-Holu-Sondi (K.10)", + "level10": "Holu (K.10)" + }, + "hom": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Ngbele-Ngenda", + "level15": "Ngendan" + }, + "hoo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega" + }, + "hop": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan" + }, + "hor": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Chari" + }, + "hos": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Vietnamese Sign" + }, + "hot": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Misim-Yamap" + }, + "hov": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Muller-Schwaner", + "level6": "Hovongan-Kereho" + }, + "how": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic" + }, + "hoy": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid" + }, + "hoz": { + "level0": "Blue Nile Mao", + "level1": "West Mao", + "level2": "Hozo-Seze" + }, + "hpo": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Northern Burmish" + }, + "hps": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "hra": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Mizoic", + "level6": "Hmaric" + }, + "hre": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Hre-Sedang-Todrah-Monam", + "level4": "Hre-Sedang" + }, + "hrk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Uliase", + "level8": "Hatuhaha" + }, + "hrm": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian", + "level7": "First Vernacular Hmong" + }, + "hro": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic" + }, + "hrr": { + "level0": "Bookkeeping" + }, + "hrt": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "Bohtan" + }, + "hrv": { + "level0": "Indo-European", + "level1": "Balto-Slavic" + }, + "hrx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "West Middle German", + "level8": "Rhenish Franconian" + }, + "hrz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Northern Tatic" + }, + "hsb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Sorbian" + }, + "hsf": { + "level0": "Bookkeeping" + }, + "hsh": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Central European Sign", + "level4": "Nuclear Central European Sign" + }, + "hsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "hsn": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic" + }, + "hss": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Modern South Arabian", + "level4": "Hobyot-Western MSA", + "level5": "Western MSA" + }, + "hti": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "East Seram", + "level4": "Setic" + }, + "hto": { + "level0": "Huitotoan", + "level1": "Nuclear Witotoan", + "level2": "Minica-Murui" + }, + "htu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Ambonic", + "level8": "Northeast Ambon" + }, + "hub": { + "level0": "Chicham", + "level1": "Shuaric", + "level2": "Huambisa-Shuar" + }, + "huc": { + "level0": "Kxa" + }, + "hud": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Patakai-Manusela", + "level4": "Manusela-Huaulu" + }, + "hue": { + "level0": "Huavean", + "level1": "San Francisco-Santa Mar\u00eda Huave" + }, + "huf": { + "level0": "Kwalean", + "level1": "Humene-Kwale" + }, + "hug": { + "level0": "Harakmbut" + }, + "huh": { + "level0": "Araucanian" + }, + "hui": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli" + }, + "huj": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Guiyang" + }, + "huk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers", + "level4": "Amalumute", + "level5": "Northwest Seram" + }, + "hul": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "Sinagoro-Keapara", + "level9": "Hula-Keapara" + }, + "hum": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Hungan-Samba" + }, + "hun": { + "level0": "Uralic", + "level1": "Hungaric" + }, + "huo": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Angkuic", + "level5": "Southern Angkuic" + }, + "hup": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "California Athabaskan" + }, + "huq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Chru-Northern Cham", + "level6": "Northern Cham" + }, + "hur": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "South Georgia Central Salish" + }, + "hus": { + "level0": "Mayan", + "level1": "Huastecan Mayan" + }, + "hut": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic" + }, + "huu": { + "level0": "Huitotoan", + "level1": "Nuclear Witotoan", + "level2": "Minica-Murui" + }, + "huv": { + "level0": "Huavean", + "level1": "San Dionisio-San Mateo Huave" + }, + "huw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "West Central Maluku", + "level3": "Sula-Buru", + "level4": "Buruic" + }, + "hux": { + "level0": "Huitotoan", + "level1": "Nuclear Witotoan" + }, + "huy": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic", + "level11": "Trans-Zab" + }, + "huz": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Tsezic", + "level4": "East Tsezic" + }, + "hva": { + "level0": "Bookkeeping" + }, + "hvc": { + "level0": "Unclassifiable" + }, + "hve": { + "level0": "Huavean", + "level1": "San Dionisio-San Mateo Huave" + }, + "hvk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Voh-Kone", + "level11": "Bwatooic" + }, + "hvn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Hawu-Dhao" + }, + "hvv": { + "level0": "Huavean", + "level1": "San Francisco-Santa Mar\u00eda Huave" + }, + "hwa": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Bakwe-Wane" + }, + "hwc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Pacific Creole English" + }, + "hwo": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Teraic", + "level5": "Eastern Tera" + }, + "hya": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Higic" + }, + "hye": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Armenic", + "level3": "Eastern-Western Armenian" + }, + "hyw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Armenic", + "level3": "Eastern-Western Armenian" + }, + "iai": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Loyalty Islands" + }, + "ian": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Sawosic", + "level3": "Iatmulic" + }, + "iap": { + "level0": "Bookkeeping" + }, + "iba": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Ibanic", + "level5": "Iban-Mualang-Seberuang", + "level6": "Iban-Seberuang", + "level7": "Northern Iban" + }, + "ibb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Efikic", + "level8": "Okop Usem", + "level9": "Efik-Ibibio" + }, + "ibd": { + "level0": "Iwaidjan Proper", + "level1": "Central Iwaidjic" + }, + "ibe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid" + }, + "ibg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic" + }, + "ibh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Rade-Jarai" + }, + "ibi": { + "level0": "Bookkeeping" + }, + "ibl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Southern Cordilleran", + "level6": "West Southern Cordilleran", + "level7": "Nuclear Southern Cordilleran" + }, + "ibm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Agoi-Doko-Iyoniyong" + }, + "ibn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ibino-Iko" + }, + "ibo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Igboid", + "level4": "Nuclear Igboid", + "level5": "Central-Northern Igbo" + }, + "ibr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ibuoroic", + "level8": "Ibuoro-ItuMbuso-Nkari", + "level9": "Ibuoro-ItuMbuso" + }, + "ibu": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Sahuan" + }, + "iby": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Eastern Ijo", + "level3": "Nikio", + "level4": "Kio Ijo" + }, + "ica": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Western Ede", + "level8": "Southwestern Ede" + }, + "ich": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Kpan-Icen" + }, + "icl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "West Scandinavian Sign", + "level4": "Danish Sign" + }, + "icr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Western Caribbean Creole", + "level14": "Miskitoic Creole English" + }, + "ida": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia" + }, + "idb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Indo-Portuguesic", + "level15": "Northern Indo-Portuguesic" + }, + "idc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Northern Benue-Congo Plateau", + "level5": "Nuclear Northern Benue-Congo Plateau", + "level6": "Kuturmi-Ajiya" + }, + "idd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Western Ede", + "level8": "Southwestern Ede" + }, + "ide": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Efikic", + "level8": "Unclassified Efikic" + }, + "idi": { + "level0": "Pahoturi" + }, + "ido": { + "level0": "Artificial Language", + "level1": "Esperantic" + }, + "idr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Sere-Indri", + "level7": "Indri-Togoyo" + }, + "ids": { + "level0": "Bookkeeping" + }, + "idt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Lakalei-Idate" + }, + "idu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Akweya", + "level5": "Etulo-Idoma", + "level6": "Nuclear Idoma", + "level7": "Idoma-Agatu-Okpogu" + }, + "ifa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Ifugaw" + }, + "ifb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Ifugaw", + "level8": "Batad-Mayoyao" + }, + "ife": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Western Ede", + "level8": "Southwestern Ede" + }, + "iff": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Erromanga" + }, + "ifk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Ifugaw" + }, + "ifm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie" + }, + "ifu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Ifugaw", + "level8": "Batad-Mayoyao" + }, + "ify": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Southern Cordilleran", + "level6": "West Southern Cordilleran", + "level7": "Nuclear Southern Cordilleran", + "level8": "Kalanguya" + }, + "igb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid" + }, + "ige": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Akweya" + }, + "igg": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Tamolan", + "level3": "Unclassified Tamolan" + }, + "igl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid" + }, + "igm": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Ataitan", + "level3": "Tangu-Igom" + }, + "ign": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Bolivian Arawakan", + "level3": "Mojeno-Paunaca", + "level4": "Moje\u00f1o" + }, + "igo": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Gum", + "level5": "Panim-Isebe-Bau" + }, + "igs": { + "level0": "Artificial Language" + }, + "igw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Igwic", + "level7": "Sasaru-Igwe" + }, + "ihb": { + "level0": "Pidgin", + "level1": "Iha-based pidgin" + }, + "ihi": { + "level0": "Bookkeeping" + }, + "ihp": { + "level0": "West Bomberai", + "level1": "Nuclear West Bomberai" + }, + "ihw": { + "level0": "Pama-Nyungan", + "level1": "Ganaic" + }, + "iii": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu" + }, + "iin": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Mantharta", + "level4": "Djiwarli-Thiin" + }, + "ijc": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Western Ijo" + }, + "ije": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Western Ijo", + "level3": "Inland Ijo", + "level4": "Biseni-Okordia" + }, + "ijj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Eastern Ede", + "level8": "Southeastern Ede" + }, + "ijn": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Eastern Ijo", + "level3": "Nikio", + "level4": "Kio Ijo" + }, + "ijs": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Eastern Ijo" + }, + "ike": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Inuit" + }, + "ikh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "North-Central Edoid", + "level6": "Central Plains Edoid", + "level7": "Emaic" + }, + "iki": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ibino-Iko" + }, + "ikk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Igboid", + "level4": "Nuclear Igboid", + "level5": "Central-Northern Igbo" + }, + "ikl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Northern Benue-Congo Plateau", + "level5": "Nuclear Northern Benue-Congo Plateau" + }, + "iko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross" + }, + "ikp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Igwic", + "level7": "Ikpeshic" + }, + "ikr": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Alaya-Athima", + "level3": "Central Alaya-Athima" + }, + "iks": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "ikt": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Inuit" + }, + "ikv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Northern Benue-Congo Plateau" + }, + "ikw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Igboid", + "level4": "Nuclear Igboid" + }, + "ikx": { + "level0": "Kuliak" + }, + "ikz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "South Mara", + "level12": "Southwest Mara" + }, + "ila": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Barat", + "level5": "North Lembata-Adonara" + }, + "ilb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Greater Eastern Botatwe", + "level9": "Central Eastern Botatwe", + "level10": "Kafue" + }, + "ile": { + "level0": "Artificial Language" + }, + "ilg": { + "level0": "Iwaidjan Proper", + "level1": "Central Iwaidjic" + }, + "ili": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Turkestan", + "level4": "Modern Turkestan", + "level5": "Uyghuric" + }, + "ilk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Southern Cordilleran" + }, + "ill": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Danaw" + }, + "ilo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon" + }, + "ils": { + "level0": "Sign Language", + "level1": "Pidgin Sign Language" + }, + "ilu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Wetar-Atauro", + "level4": "Wetar" + }, + "ilv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "West Lower Cross" + }, + "ima": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid" + }, + "ime": { + "level0": "Bookkeeping" + }, + "imi": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Osum-Wadaginam-Pomoikan", + "level5": "Pomoikan", + "level6": "Anamuxric" + }, + "iml": { + "level0": "Coosan" + }, + "imn": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic" + }, + "imo": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Hagen", + "level3": "Aua-Gawil" + }, + "imr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Southwest Babar" + }, + "imy": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic", + "level4": "Luvic", + "level5": "Lyco-Carian", + "level6": "Milyan-Carian" + }, + "ina": { + "level0": "Artificial Language" + }, + "inb": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B", + "level3": "Imbabura-Colombia-Oriente Quechua", + "level4": "Colombia-Oriente Quechua" + }, + "ind": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Standard Malay-Indonesian" + }, + "ing": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Koyukonic" + }, + "inh": { + "level0": "Nakh-Daghestanian", + "level1": "Nakh", + "level2": "Chechen-Ingush" + }, + "inl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "Malaysian Sign", + "level5": "Indonesian Sign" + }, + "inm": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Sayhadic" + }, + "inn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran" + }, + "ino": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria", + "level5": "Kamano-Yagaria" + }, + "inp": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Purus-Chamicuro", + "level3": "Purus" + }, + "ins": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Indo-Pakistani-Nepalese Sign", + "level3": "Indo-Pakistani Sign" + }, + "int": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Mranmaic" + }, + "inz": { + "level0": "Chumashan", + "level1": "Southern Chumashan", + "level2": "Central Chumashan" + }, + "ior": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Outer South Ethiopic", + "level6": "TT-Group", + "level7": "Peripheral Western Gurage" + }, + "iou": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Wantoatic" + }, + "iow": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Winnebago-Chiwere" + }, + "ipi": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Engan" + }, + "ipo": { + "level0": "Anim", + "level1": "Inland Gulf of Papua" + }, + "iqu": { + "level0": "Zaparoan", + "level1": "Iquito-Arabela", + "level2": "Cahuarano-Iquito" + }, + "ire": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Southwest Cenderawasih Bay", + "level6": "Yaur-Yerisiam" + }, + "irh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "Banda-Geser", + "level4": "Seran Laut", + "level5": "Koiwai-Irarutu", + "level6": "Irarutic" + }, + "iri": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Zaric" + }, + "irk": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "South Cushitic", + "level3": "Greater West Rift South Cushitic", + "level4": "West Rift South Cushitic", + "level5": "Northern West Rift South Cushitic", + "level6": "Iraqwoid" + }, + "irr": { + "level0": "Bookkeeping" + }, + "iru": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Irula-Muduga" + }, + "irx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Sabakor" + }, + "iry": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Mangyan" + }, + "isa": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Unclassified Goroka" + }, + "isc": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Poyanawa Subgroup" + }, + "isd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley" + }, + "ise": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Italian Sign" + }, + "isg": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic" + }, + "ish": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "North-Central Edoid", + "level6": "Central Plains Edoid" + }, + "isi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid", + "level7": "Bakor-Ejagham", + "level8": "Bakor", + "level9": "Northern Bakor", + "level10": "Abanyom-Nkem-Nkum" + }, + "isk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Sanglechi-Ishkashimi" + }, + "isl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "West Scandinavian", + "level6": "Icelandic-Faroese" + }, + "ism": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Sobeic" + }, + "isn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Nyaturu-Nilamba" + }, + "iso": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Southwestern Edoid" + }, + "isr": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "DGSic" + }, + "ist": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Italo-Dalmatian", + "level9": "Dalmatian Romance" + }, + "isu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "West Ring", + "level10": "Aghemic" + }, + "ita": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Italo-Dalmatian", + "level9": "Italian Romance" + }, + "itb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Itneg" + }, + "itd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic", + "level8": "Sumambu-Tagal", + "level9": "Tidung-Bulusu", + "level10": "Tidung" + }, + "ite": { + "level0": "Chapacuran", + "level1": "Moreic-Waric", + "level2": "Moreic", + "level3": "Kujubim-More" + }, + "iti": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Itneg" + }, + "itk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian" + }, + "itl": { + "level0": "Chukotko-Kamchatkan", + "level1": "Kamchatkan" + }, + "itm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ibuoroic", + "level8": "Ibuoro-ItuMbuso-Nkari", + "level9": "Ibuoro-ItuMbuso" + }, + "itr": { + "level0": "Left May", + "level1": "Western Left May", + "level2": "Iteri-Bo" + }, + "its": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri" + }, + "itt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Bontok-Kankanay", + "level8": "Kankanay", + "level9": "Maeng-Northern Kankanay" + }, + "itu": { + "level0": "Bookkeeping" + }, + "itv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Gaddangic" + }, + "itw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ibuoroic" + }, + "itx": { + "level0": "Tor-Orya", + "level1": "Tor" + }, + "ity": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Masadiit" + }, + "itz": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Yucatecan", + "level3": "Nuclear Yucatecan" + }, + "ium": { + "level0": "Hmong-Mien", + "level1": "Mienic", + "level2": "Mien-Mun" + }, + "ivb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Batanic" + }, + "ivv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Batanic", + "level3": "Yami-Itbayat" + }, + "iwk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Southern Cordilleran", + "level6": "West Southern Cordilleran", + "level7": "Nuclear Southern Cordilleran" + }, + "iwm": { + "level0": "Sepik", + "level1": "Iwam-Wogamus", + "level2": "Iwamic" + }, + "iwo": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok" + }, + "iws": { + "level0": "Sepik", + "level1": "Iwam-Wogamus", + "level2": "Iwamic" + }, + "ixc": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan" + }, + "ixl": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Mamean", + "level4": "Ixilan" + }, + "iya": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Osse" + }, + "iyo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid" + }, + "iyx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Nzebi-Laali-Yaa", + "level19": "Laali-Yaa" + }, + "izh": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Ladogan" + }, + "izi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Igboid", + "level4": "Nuclear Igboid", + "level5": "Central-Northern Igbo" + }, + "izm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru" + }, + "izr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Zaric", + "level6": "Nuclear Zaric", + "level7": "Izeric" + }, + "jaa": { + "level0": "Arawan", + "level1": "Madi-Madiha" + }, + "jab": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Hyamic" + }, + "jac": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Kanjobalan-Chujean", + "level4": "Kanjobalan", + "level5": "Kanjobal-Jacaltec" + }, + "jad": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "West Manding", + "level9": "Xasonka" + }, + "jae": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "North Huon Gulf linkage" + }, + "jaf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Teraic", + "level5": "Western Tera" + }, + "jah": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian" + }, + "jai": { + "level0": "Bookkeeping" + }, + "jaj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Santa Isabel", + "level10": "Central Santa Isabel", + "level11": "Zazao-Blanga" + }, + "jak": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric" + }, + "jal": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers", + "level4": "Amalumute" + }, + "jam": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Western Caribbean Creole", + "level14": "Jamaicanic" + }, + "jao": { + "level0": "Pama-Nyungan", + "level1": "Ngarna" + }, + "jap": { + "level0": "Bookkeeping" + }, + "jaq": { + "level0": "Anim", + "level1": "Marind-Boazi-Yaqai", + "level2": "Yaqayic" + }, + "jar": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Jarawaic" + }, + "jas": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Javanesic", + "level3": "Modern Javanese", + "level4": "Global Javanese" + }, + "jat": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic", + "level9": "Hindko-Siraiki", + "level10": "Siraikic" + }, + "jau": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Southwest Cenderawasih Bay", + "level6": "Yaur-Yerisiam" + }, + "jav": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Javanesic", + "level3": "Modern Javanese", + "level4": "Global Javanese" + }, + "jax": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay" + }, + "jay": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu" + }, + "jaz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian" + }, + "jbe": { + "level0": "Bookkeeping" + }, + "jbi": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Badjiri-Eastern Karnic" + }, + "jbj": { + "level0": "South Bird's Head Family", + "level1": "East South Bird's Head", + "level2": "Kemberanic" + }, + "jbk": { + "level0": "Turama-Kikori", + "level1": "Turama-Omatian" + }, + "jbm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Tarokoid", + "level5": "Bijimic-Sur-Shall", + "level6": "Kwangic", + "level7": "Vaghat" + }, + "jbn": { + "level0": "Afro-Asiatic", + "level1": "Berber" + }, + "jbo": { + "level0": "Artificial Language" + }, + "jbr": { + "level0": "Tor-Orya", + "level1": "Tor", + "level2": "Coastal Tor" + }, + "jbt": { + "level0": "Nuclear-Macro-Je", + "level1": "Jabuti" + }, + "jbu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Jukun", + "level7": "Jibu-Wase", + "level8": "Jibuic" + }, + "jcs": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "jct": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "West Kipchak", + "level6": "Crimean Tatar-Urum", + "level7": "Crimeaic" + }, + "jda": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Lahauli-Spiti", + "level7": "Spiti-Jad" + }, + "jdg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic", + "level9": "Lasi-Jadgali" + }, + "jdt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Caucasian Tat" + }, + "jeb": { + "level0": "Cahuapanan" + }, + "jee": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Chaurasiya" + }, + "jeh": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Kayong-Jeh-Halang", + "level4": "Jeh-Halang" + }, + "jei": { + "level0": "Yam", + "level1": "Morehead-Maro" + }, + "jek": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Jogo-Jeri", + "level6": "Jeri" + }, + "jel": { + "level0": "Bulaka River", + "level1": "Jelmek" + }, + "jen": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Southern Bikwin-Jen", + "level6": "Jen", + "level7": "Doso-Dza" + }, + "jer": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos", + "level10": "Boze-Loro" + }, + "jet": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic" + }, + "jeu": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit", + "level6": "Dangla", + "level7": "Unclassified Dangla" + }, + "jgb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Ngbele-Ngenda", + "level15": "Extreme North Vestigial Suffixes Bantu" + }, + "jgo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "West Bamileke" + }, + "jhi": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian", + "level4": "Maniq-Menraq-Batek", + "level5": "Menraq-Batek" + }, + "jhs": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "jia": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Meridional" + }, + "jib": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Jukun", + "level7": "Jibu-Wase", + "level8": "Jibuic" + }, + "jic": { + "level0": "Jicaquean" + }, + "jid": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic" + }, + "jie": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "Unclassified Biu-Mandara" + }, + "jig": { + "level0": "Mirndi" + }, + "jih": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Gyalrongic", + "level5": "West Gyalrongic", + "level6": "Horpa" + }, + "jii": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana", + "level8": "Baiso-Jiiddu" + }, + "jil": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Mindjim", + "level4": "Upper Minjim" + }, + "jim": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Gude-Jimi-Zizilivakan" + }, + "jio": { + "level0": "Tai-Kadai", + "level1": "Hlaic" + }, + "jiq": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Gyalrongic", + "level5": "West Gyalrongic" + }, + "jit": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Suguti" + }, + "jiu": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Jino" + }, + "jiv": { + "level0": "Chicham", + "level1": "Shuaric", + "level2": "Huambisa-Shuar" + }, + "jiy": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Jino" + }, + "jje": { + "level0": "Koreanic" + }, + "jka": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "Kaera-Straits" + }, + "jkm": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen" + }, + "jko": { + "level0": "East Strickland", + "level1": "Kubo-Samo-Bibo" + }, + "jkp": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Southern Karen" + }, + "jkr": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Koro-Holon" + }, + "jks": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "jku": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Jaku-Gubi" + }, + "jle": { + "level0": "Narrow Talodi", + "level1": "Buram-Saraf", + "level2": "Buram Hill Chain" + }, + "jls": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "American Sign" + }, + "jma": { + "level0": "Dagan", + "level1": "Central Dagan" + }, + "jmb": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2" + }, + "jmc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Kilimanjaro Bantu", + "level10": "Chaga", + "level11": "West Kilimanjaro" + }, + "jmd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuclear Tanimbar-Bomberai", + "level4": "Yamdena-Onin" + }, + "jmi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Guruntumic" + }, + "jml": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Indo-Aryan Northern zone", + "level8": "Eastern Pahari" + }, + "jmn": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Central Naga", + "level4": "Yimchingric", + "level5": "Makuric" + }, + "jmr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Southeast Western Oti-Volta", + "level13": "Kamara-Hanga" + }, + "jms": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Bebe-Kemezung", + "level8": "Naki-Kemezung", + "level9": "Nakic" + }, + "jmw": { + "level0": "Turama-Kikori", + "level1": "Turama-Omatian" + }, + "jmx": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Guerrero Mixtec", + "level7": "Coicoyan-Metlatonoc" + }, + "jna": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Western West Himalayish", + "level4": "Kinnauric", + "level5": "Thebor" + }, + "jnd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Bagri-Jandavra" + }, + "jng": { + "level0": "Yangmanic" + }, + "jni": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos" + }, + "jnj": { + "level0": "Ta-Ne-Omotic" + }, + "jnl": { + "level0": "Sino-Tibetan", + "level1": "Raji-Raute", + "level2": "Raute-Rawat" + }, + "jns": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali" + }, + "job": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "Forest Kivu", + "level12": "Fuliiric", + "level13": "Fuliiru-Vira" + }, + "jod": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Maninka-Mori" + }, + "jog": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "jor": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup II", + "level7": "Warazu-Sirionoid", + "level8": "Sirionoid" + }, + "jos": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Arab Sign", + "level3": "Levantine-Iraqi Sign" + }, + "jow": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Duun-Jo" + }, + "jpn": { + "level0": "Japonic", + "level1": "Japanesic", + "level2": "Japan-Taiwan Japanese" + }, + "jpr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic" + }, + "jqr": { + "level0": "Aymaran", + "level1": "Tupe" + }, + "jra": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Rade-Jarai" + }, + "jrr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Wurbo-Wannu", + "level7": "Wurbo" + }, + "jrt": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Kofyar-Mushere-Chip", + "level7": "Kofyaric" + }, + "jru": { + "level0": "Cariban", + "level1": "Opon-Yukpan", + "level2": "Yukpan" + }, + "jsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "JSLic" + }, + "jua": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva", + "level7": "Nuclear Kawahiva" + }, + "jub": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Wurbo-Wannu" + }, + "juc": { + "level0": "Tungusic", + "level1": "Manchu-Jurchen" + }, + "jud": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Maninka-Mori" + }, + "juh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Jukun", + "level7": "Kororofa", + "level8": "Kona" + }, + "jui": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Unclassified Core Thura-Yura" + }, + "juk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Jukun", + "level7": "Kororofa" + }, + "jul": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Sherpa-Jirel" + }, + "jum": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Burun", + "level3": "Southern Burun" + }, + "jun": { + "level0": "Austroasiatic", + "level1": "Mundaic" + }, + "juo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Jukun", + "level7": "Kororofa", + "level8": "Kona" + }, + "jup": { + "level0": "Naduhup", + "level1": "Eastern Naduhup", + "level2": "Hup-Yuhup" + }, + "jur": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Jurunic" + }, + "jus": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "jut": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "South Scandinavian" + }, + "juu": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Guruntumic", + "level7": "Tala-Sho-Zangwal" + }, + "juw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo", + "level6": "Jukun", + "level7": "Jibu-Wase" + }, + "juy": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "Sora-Juray-Gorum", + "level3": "Sora-Juray" + }, + "jvd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Global Dutch" + }, + "jvn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Javanesic", + "level3": "Modern Javanese", + "level4": "Global Javanese" + }, + "jwi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Southern Bia", + "level9": "Jwira-Nzima" + }, + "jye": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "Judeo-Muslim Sanaani Arabic" + }, + "jyy": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Bagirmic", + "level6": "Morom-Jaya-Naba" + }, + "kaa": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Southeast Kipchak", + "level5": "South Kipchak" + }, + "kab": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Kabyle-Atlas Berber" + }, + "kac": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Jingpho-Luish", + "level3": "Jingpho" + }, + "kad": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Northern Benue-Congo Plateau", + "level5": "Nuclear Northern Benue-Congo Plateau" + }, + "kae": { + "level0": "Austronesian", + "level1": "East Formosan", + "level2": "Northern East Formosan" + }, + "kaf": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Kazhouish" + }, + "kag": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Melanau-Kajang", + "level5": "Kajang", + "level6": "Kajaman-Lahanan" + }, + "kah": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Fer-Gula" + }, + "kai": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic" + }, + "kaj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Zaric", + "level6": "Nuclear Zaric", + "level7": "Katabic" + }, + "kak": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Southern Cordilleran", + "level6": "West Southern Cordilleran", + "level7": "Nuclear Southern Cordilleran", + "level8": "Kalanguya" + }, + "kal": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Inuit", + "level3": "Greenlandic Inuit" + }, + "kam": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Kamba-Dhaisu" + }, + "kan": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid", + "level6": "Nuclear Kannaoid" + }, + "kao": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "West Manding", + "level9": "Xasonka" + }, + "kap": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Tsezic", + "level4": "East Tsezic" + }, + "kaq": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Chama subgroup", + "level5": "Shipibo-Konibo-Kapanawa" + }, + "kas": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Kashmiric" + }, + "kat": { + "level0": "Kartvelian", + "level1": "Georgian-Zan", + "level2": "Georgic" + }, + "kav": { + "level0": "Bookkeeping" + }, + "kaw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Javanesic" + }, + "kax": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran", + "level3": "Kao River", + "level4": "Paguic" + }, + "kay": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani" + }, + "kaz": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Southeast Kipchak", + "level5": "South Kipchak" + }, + "kba": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Mirning" + }, + "kbb": { + "level0": "Cariban", + "level1": "Parukotoan" + }, + "kbc": { + "level0": "Guaicuruan" + }, + "kbd": { + "level0": "Abkhaz-Adyge", + "level1": "Circassian" + }, + "kbe": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Northeastern Pama", + "level4": "Umpilaic" + }, + "kbf": { + "level0": "Bookkeeping" + }, + "kbg": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic", + "level7": "Unclassified Southern Tibetic" + }, + "kbi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Kaptiau-Tarpia" + }, + "kbj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Ngbele-Ngenda", + "level15": "Extreme North Vestigial Suffixes Bantu" + }, + "kbk": { + "level0": "Koiarian", + "level1": "Koiaric", + "level2": "Koita-Koiari" + }, + "kbl": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Kanuri-Kanembu", + "level3": "Kanembuic" + }, + "kbm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage" + }, + "kbn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Central Mbum", + "level6": "Karangic", + "level7": "Kare-Pana" + }, + "kbo": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Central Moru-Madi", + "level3": "Kalikoic" + }, + "kbp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Eastern Grusi", + "level9": "Kabiyeic" + }, + "kbq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria", + "level5": "Kamano-Yagaria" + }, + "kbr": { + "level0": "Ta-Ne-Omotic", + "level1": "Kefoid", + "level2": "South Gonga" + }, + "kbs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "B10-B30", + "level8": "Okani (B.30)", + "level9": "Northern Okani" + }, + "kbt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "West Central Papuan linkage" + }, + "kbu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Unclassified Rajasthani" + }, + "kbv": { + "level0": "Senagi" + }, + "kbw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Kairiruic linkage", + "level9": "Kaiep-Terebu" + }, + "kbx": { + "level0": "Keram", + "level1": "East Keram" + }, + "kby": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Kanuri-Kanembu", + "level3": "Kanuric" + }, + "kbz": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.4", + "level5": "Ronic", + "level6": "Mundat-Karfa" + }, + "kca": { + "level0": "Uralic", + "level1": "Khantyic", + "level2": "Northern Khanty" + }, + "kcb": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Wojokesic" + }, + "kcc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross", + "level7": "Lokoic", + "level8": "Lubila-Lokaa" + }, + "kcd": { + "level0": "Yam", + "level1": "Kanum", + "level2": "Ngkrn-Ngkantr", + "level3": "Ngkantr" + }, + "kce": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "kcf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo" + }, + "kcg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Zaric", + "level6": "Nuclear Zaric", + "level7": "Katabic" + }, + "kch": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "kci": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Gyong-Kamantan" + }, + "kcj": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Wolof-BKK", + "level3": "Nyun", + "level4": "Buy" + }, + "kck": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Kalanga-Nambya" + }, + "kcl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "North Huon Gulf linkage" + }, + "kcm": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Fer-Gula" + }, + "kcn": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Egyptic Arabic", + "level7": "Egypto-Sudanic Arabic", + "level8": "Sudanese-Chadian Arabic", + "level9": "East Sudanic Arabic" + }, + "kco": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Cromwell", + "level5": "Dallman", + "level6": "Kinalakna-Kumukio" + }, + "kcp": { + "level0": "Kadugli-Krongo", + "level1": "Central-Western Kadugli-Krongo", + "level2": "Katcha-Kadugli-Miri-Kanga" + }, + "kcq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja", + "level7": "Tulaic", + "level8": "Tula-Ma-Yebu", + "level9": "Awak-Kamo" + }, + "kcr": { + "level0": "Katla-Tima", + "level1": "Katla-Julud" + }, + "kcs": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Goemaic", + "level7": "Talic", + "level8": "Piapung-Koenoem" + }, + "kct": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ottilien", + "level3": "Watam-Kaian" + }, + "kcu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "East Ruvu", + "level11": "Central East Ruvu" + }, + "kcv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Ruund-Salampasu", + "level11": "Lunda-Ruund-Kete", + "level12": "Ruund-Kete" + }, + "kcw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu" + }, + "kcx": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "East Ometo" + }, + "kcy": { + "level0": "Songhay", + "level1": "Northwest Songhay", + "level2": "Northern Songhay" + }, + "kcz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Sukuma-Nyamwezi (F.20)", + "level9": "Nyamwezic" + }, + "kda": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Yuin-Kuri", + "level4": "Kuri", + "level5": "Hunter-Hastings" + }, + "kdc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "East Ruvu", + "level11": "Central East Ruvu", + "level12": "Kutu-Zaramo" + }, + "kdd": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Pintupic", + "level4": "Nuclear Pintupic", + "level5": "Wangkatja-Tjarra", + "level6": "Tjarra" + }, + "kde": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Ruvuma", + "level9": "Makonde-Makwe" + }, + "kdf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Mengenic", + "level9": "Mamusa-Mengen" + }, + "kdg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi", + "level8": "Malungu-Central Sabi", + "level9": "Central Sabi", + "level10": "Bisa-Lamba (M.50)" + }, + "kdh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Eastern Grusi", + "level9": "Tem-Chala" + }, + "kdi": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Southern Lwoo", + "level4": "Lango-Kumam" + }, + "kdj": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Teso-Turkana", + "level4": "Turkanic" + }, + "kdk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Extreme Southern New Caledonian" + }, + "kdl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Kambari-Cicipu", + "level6": "Kambaric", + "level7": "West Kambaric" + }, + "kdm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Gyong-Kamantan" + }, + "kdn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja" + }, + "kdp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic", + "level5": "Kanufi-Ninkyob-Angan" + }, + "kdq": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Kochic" + }, + "kdr": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "West Kipchak" + }, + "kds": { + "level0": "Bookkeeping" + }, + "kdt": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "West Katuic", + "level3": "Kuy-Souei" + }, + "kdu": { + "level0": "Nubian", + "level1": "Central Nubian", + "level2": "Kordofan Nubian", + "level3": "Eastern Kordofan Nubian" + }, + "kdv": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Jingpho-Luish", + "level3": "Luish", + "level4": "Chakpa-Kadu-Ganan" + }, + "kdw": { + "level0": "Mombum-Koneraw" + }, + "kdx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo" + }, + "kdy": { + "level0": "Tor-Orya", + "level1": "Tor", + "level2": "Coastal Tor" + }, + "kdz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Nkambe", + "level9": "Mfumteic" + }, + "kea": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Upper Guinea Portuguese" + }, + "keb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ngomic", + "level8": "Nuclear Ngomic", + "level9": "Akeleic" + }, + "kec": { + "level0": "Kadugli-Krongo" + }, + "ked": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza" + }, + "kee": { + "level0": "Keresan" + }, + "kef": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Western Gbe", + "level5": "Kpesi-Waci" + }, + "keg": { + "level0": "Temeinic" + }, + "keh": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Sawosic", + "level3": "Iatmulic" + }, + "kei": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuclear Tanimbar-Bomberai", + "level4": "Kei-Fordata" + }, + "kej": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "kek": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean" + }, + "kem": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Timor", + "level3": "Kemak-Tukudede" + }, + "ken": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Mamfe" + }, + "keo": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Barian" + }, + "kep": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Yerukula-Korava-Kaikadi" + }, + "keq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Halbic" + }, + "ker": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.3" + }, + "kes": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta", + "level5": "Kugboic" + }, + "ket": { + "level0": "Yeniseian", + "level1": "Northern Yeniseian" + }, + "keu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Kebu-Animere" + }, + "kev": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "kew": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli", + "level3": "Sau-Angal-Kewa", + "level4": "Angal-Kewa", + "level5": "Kewa" + }, + "kex": { + "level0": "Bookkeeping" + }, + "key": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Macro-Oriya" + }, + "kez": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "North-South Central Delta Cross", + "level7": "Koring-Kukele", + "level8": "Kukele-Uzekwe" + }, + "kfa": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu" + }, + "kfb": { + "level0": "Dravidian", + "level1": "Central Dravidian", + "level2": "Kolami-Naiki" + }, + "kfc": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Konda-Kui" + }, + "kfd": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "South-Western Dravidian", + "level4": "Koraga" + }, + "kfe": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota" + }, + "kff": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Southeast Gondi", + "level5": "South Bastar Gondi-Koya" + }, + "kfg": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "South-Western Dravidian", + "level4": "Tuluic" + }, + "kfh": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "kfi": { + "level0": "Bookkeeping" + }, + "kfj": { + "level0": "Bookkeeping" + }, + "kfk": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Western West Himalayish", + "level4": "Kinnauric" + }, + "kfl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "Center Ring" + }, + "kfm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Central Iran Kermanic", + "level8": "Nuclear Central Iran Kermanic" + }, + "kfn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "West Ring" + }, + "kfo": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Maninka-Mori", + "level10": "Koro-Koyaga" + }, + "kfp": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric", + "level5": "Kodaku-Korwa" + }, + "kfq": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda" + }, + "kfr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic", + "level9": "Sindhi-Kachchi" + }, + "kfs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic" + }, + "kft": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewaric" + }, + "kfu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic" + }, + "kfv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga", + "level10": "Unclassified Gauda-Banga" + }, + "kfw": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Kolhrengic" + }, + "kfx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Nuclear Himachali" + }, + "kfy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Indo-Aryan Northern zone", + "level8": "Central Pahari" + }, + "kfz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur" + }, + "kga": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Maninka-Mori", + "level10": "Koro-Koyaga" + }, + "kgb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "Maya-Matbat", + "level6": "Raja Ampat Maya" + }, + "kge": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Lampungic", + "level3": "Pesisir" + }, + "kgf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Rawlinson", + "level5": "Pindiu" + }, + "kgh": { + "level0": "Bookkeeping" + }, + "kgi": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "Malaysian Sign" + }, + "kgj": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Kham", + "level5": "Gamale-Parbate" + }, + "kgk": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.A", + "level8": "Paraguay-Brazil Guarani", + "level9": "Kaiowa" + }, + "kgl": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric" + }, + "kgm": { + "level0": "Bookkeeping" + }, + "kgn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Northern Tatic" + }, + "kgo": { + "level0": "Kadugli-Krongo", + "level1": "Central-Western Kadugli-Krongo", + "level2": "Krongo-Tumtum" + }, + "kgp": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Southern Je", + "level3": "Kaingang-Xokleng", + "level4": "Kaingangic" + }, + "kgq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro" + }, + "kgs": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "North Coast Pama-Nyungan", + "level3": "Gumbaynggiric" + }, + "kgt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Vutic" + }, + "kgu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Omosan" + }, + "kgv": { + "level0": "West Bomberai" + }, + "kgw": { + "level0": "Maybratic" + }, + "kgx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Southern Kaili-Wolio", + "level5": "Island Kaili-Wolio", + "level6": "Wolio-Kamaru" + }, + "kgy": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Kyirong-Kagate", + "level9": "Gyalsumdo-Nubri-Kyirong" + }, + "kha": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Khasian", + "level3": "Khasi-Pnar-Lyngngam", + "level4": "Khasi-Pnar" + }, + "khb": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Northern Shanic", + "level12": "Sipsongpannic" + }, + "khc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Tukangbesi-Bonerate", + "level8": "Tukang Besi" + }, + "khd": { + "level0": "Yam", + "level1": "Kanum", + "level2": "Ngkrn-Ngkantr", + "level3": "Ngkantr" + }, + "khe": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Becking-Dawi" + }, + "khf": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Khmu'" + }, + "khg": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Kham-Hor" + }, + "khj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Northern Benue-Congo Plateau", + "level5": "Nuclear Northern Benue-Congo Plateau", + "level6": "Kuturmi-Ajiya" + }, + "khk": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Eastern Mongolic", + "level3": "Khalkha-Buriat", + "level4": "Mongolian" + }, + "khl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Ngero", + "level8": "Eastern Ngero", + "level9": "Kaliai-Kove" + }, + "khm": { + "level0": "Austroasiatic", + "level1": "Khmeric" + }, + "khn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Khandesic" + }, + "kho": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Saka-Wakhi", + "level5": "Saka" + }, + "khq": { + "level0": "Songhay", + "level1": "Northwest Songhay" + }, + "khr": { + "level0": "Austroasiatic", + "level1": "Mundaic" + }, + "khs": { + "level0": "Bosavi", + "level1": "Bosavi Watershed" + }, + "kht": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Assam Tai B" + }, + "khu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Nyaneka-Nkhumbi" + }, + "khv": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Tsezic", + "level4": "West Tsezic" + }, + "khw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan" + }, + "khx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Mituku-Lega", + "level9": "Lega", + "level10": "Western Lega" + }, + "khy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "Kele-Lombo" + }, + "khz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "Sinagoro-Keapara", + "level9": "Hula-Keapara" + }, + "kia": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Kim-Besme-Goundo" + }, + "kib": { + "level0": "Heibanic", + "level1": "West-Central Heibanic", + "level2": "Central Heibanic" + }, + "kic": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Fox" + }, + "kid": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Yemne-Kimbi" + }, + "kie": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Runga-Kibet" + }, + "kif": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Kham", + "level5": "Gamale-Parbate", + "level6": "Parbate Kham" + }, + "kig": { + "level0": "Kolopom", + "level1": "Kimaama-Riantana" + }, + "kih": { + "level0": "Border", + "level1": "Bewani", + "level2": "Pagi-Kilmeri" + }, + "kii": { + "level0": "Caddoan", + "level1": "Northern Caddoan", + "level2": "Pawnee-Kitsai" + }, + "kij": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Kilivila-Misima", + "level8": "Kilivilic", + "level9": "Kilivila-Muyuw" + }, + "kik": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Gikuyu-Temi" + }, + "kil": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2", + "level6": "Central West Chadic B.2", + "level7": "Warji-Gala-Kariya" + }, + "kim": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "South Siberian Turkic", + "level3": "Sayan-Yenisei Turkic", + "level4": "Sayan" + }, + "kin": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "West Highlands Kivu" + }, + "kio": { + "level0": "Kiowa-Tanoan" + }, + "kip": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Kham" + }, + "kiq": { + "level0": "Kaure-Kosare" + }, + "kir": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Southeast Kipchak", + "level5": "East Kipchak" + }, + "kis": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Manamic linkage", + "level9": "Kis-Wogeo" + }, + "kit": { + "level0": "Pahoturi" + }, + "kiu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Zaza" + }, + "kiv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Sukuma-Nyamwezi (F.20)", + "level9": "Nyamwezic" + }, + "kiw": { + "level0": "Kiwaian" + }, + "kix": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southeastern Patkaian", + "level5": "Lainongic", + "level6": "Khiamniungic" + }, + "kiy": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "West Tariku", + "level3": "Fayu-Kirikiri" + }, + "kiz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Kisi-Pangwa" + }, + "kja": { + "level0": "Nimboranic", + "level1": "Outer Nimboranic", + "level2": "Mlap-Gresi-Kemtuik" + }, + "kjb": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Kanjobalan-Chujean", + "level4": "Kanjobalan", + "level5": "Kanjobal-Jacaltec" + }, + "kjc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Makassaric", + "level5": "Konjo" + }, + "kjd": { + "level0": "Kiwaian" + }, + "kje": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Luangic-Kisaric", + "level5": "Kisaric" + }, + "kjf": { + "level0": "Bookkeeping" + }, + "kjg": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Khmu'" + }, + "kjh": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "South Siberian Turkic", + "level3": "Sayan-Yenisei Turkic", + "level4": "Yenisey Turkic" + }, + "kji": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "West Santa Isabel" + }, + "kjj": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian" + }, + "kjk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Makassaric", + "level5": "Konjo" + }, + "kjl": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Kham", + "level5": "Gamale-Parbate", + "level6": "Parbate Kham" + }, + "kjm": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Khao-Bit" + }, + "kjn": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Alaya-Athima", + "level3": "Southwestern Alaya-Athima" + }, + "kjo": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Nuclear Himachali" + }, + "kjp": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Peripheral Karen", + "level3": "Pwo", + "level4": "Eastern-Western Pwo Karen" + }, + "kjq": { + "level0": "Keresan" + }, + "kjr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Eastern Yapen" + }, + "kjs": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli", + "level3": "Sau-Angal-Kewa", + "level4": "Angal-Kewa", + "level5": "Kewa", + "level6": "Southeast Kewa" + }, + "kjt": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Peripheral Karen", + "level3": "Pwo", + "level4": "Northern Pwo Karen" + }, + "kju": { + "level0": "Pomoan", + "level1": "Russian River and Eastern", + "level2": "Russian River", + "level3": "Southern Pomoan-Kashaya" + }, + "kjv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "South Slavic", + "level5": "Western South Slavic" + }, + "kjx": { + "level0": "North Bougainville" + }, + "kjy": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli", + "level3": "Sau-Angal-Kewa", + "level4": "Angal-Kewa", + "level5": "Kewa", + "level6": "Southeast Kewa" + }, + "kjz": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Phobjib-Chali-Bumthangic", + "level4": "Chali-Bumthangic", + "level5": "Bumthangic" + }, + "kka": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid", + "level6": "Dibo-Kupa" + }, + "kkb": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Doutai-Kai-Waritai" + }, + "kkc": { + "level0": "East Strickland" + }, + "kkd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru" + }, + "kke": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Nuclear Mokole", + "level8": "Mixiforic" + }, + "kkf": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Tshanglic" + }, + "kkg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Northern Kalinga", + "level9": "Northwest Kalinga" + }, + "kkh": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Southern Shanic", + "level11": "Yuanic" + }, + "kki": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu" + }, + "kkj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)" + }, + "kkk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Santa Isabel", + "level10": "Central Santa Isabel" + }, + "kkl": { + "level0": "Nuclear Trans New Guinea", + "level1": "Mek", + "level2": "Western Mek" + }, + "kkm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Kiong-Korop" + }, + "kkn": { + "level0": "Bookkeeping" + }, + "kko": { + "level0": "Nubian", + "level1": "Central Nubian", + "level2": "Kordofan Nubian", + "level3": "Western Kordofan Nubian" + }, + "kkp": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Southwest Pama", + "level3": "Coastal Southwest Paman", + "level4": "Dhawa-Kaber" + }, + "kkq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Komoic", + "level15": "Bilaic", + "level16": "Bila-Kaiku" + }, + "kkr": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Boghomic", + "level7": "Kir-Mangas" + }, + "kks": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Galambu-Bele", + "level9": "Kirfi-Bele", + "level10": "Giiwo-Daza" + }, + "kkt": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Thulung-Tilung-Koyi" + }, + "kku": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "kkv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Maduresic" + }, + "kkw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie" + }, + "kkx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "North West Greater Barito" + }, + "kky": { + "level0": "Pama-Nyungan", + "level1": "Yimidhirr-Yalanji-Yidinic" + }, + "kkz": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Cordillera Athabaskan", + "level5": "Nahanni" + }, + "klb": { + "level0": "Cochimi-Yuman", + "level1": "Yuman" + }, + "klc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru", + "level7": "Sambaic" + }, + "kld": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Wiradhuric" + }, + "kle": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Khambu", + "level6": "Kulungic" + }, + "klf": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Maba-Masalit", + "level3": "Macro-Maba" + }, + "klg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Mansakan", + "level5": "Western Mansakan" + }, + "klh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Uruwa", + "level4": "Unclassified Uruwa" + }, + "kli": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Torajic" + }, + "klj": { + "level0": "Turkic", + "level1": "Common Turkic" + }, + "klk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru" + }, + "kll": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Mansakan", + "level5": "Western Mansakan", + "level6": "Kagan-Kalagan" + }, + "klm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Kabenau" + }, + "klo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Yukubenic" + }, + "klp": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Wojokesic", + "level3": "Kamasa-Susuami" + }, + "klq": { + "level0": "Turama-Kikori" + }, + "klr": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Upper Dudhkosi" + }, + "kls": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan" + }, + "klt": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Uruwa" + }, + "klu": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Bassa-Klao", + "level5": "Klao-Tajuasohn" + }, + "klv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage", + "level8": "Central-Southeast Malakula", + "level9": "Southeastern Malakula linkage" + }, + "klw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Greater Kaili", + "level6": "Kulawi" + }, + "klx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Bwaidoga linkage" + }, + "kly": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Southern Kaili-Wolio", + "level5": "Island Kaili-Wolio", + "level6": "Kalao-Laiyolo" + }, + "klz": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "West Alor" + }, + "kma": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Buli-Koma" + }, + "kmb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbundu (H.20)" + }, + "kmc": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Mulam-Kam", + "level4": "Kamic" + }, + "kmd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Central and South Kalinga", + "level9": "South Kalinga", + "level10": "Southeastern Kalinga" + }, + "kme": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Dualaic", + "level9": "Kole-Isubu" + }, + "kmf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso" + }, + "kmg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Trans Vitiaz", + "level5": "Huon Tip", + "level6": "Kate-Mape" + }, + "kmh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "Kalam-Kobon", + "level4": "Etp-Ti Kalam" + }, + "kmi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid", + "level6": "Dibo-Kupa", + "level7": "Abawa", + "level8": "Kami-Gupa" + }, + "kmj": { + "level0": "Dravidian", + "level1": "North Dravidian", + "level2": "Kurux-Malto", + "level3": "Malto" + }, + "kmk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Northern Kalinga" + }, + "kml": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Central and South Kalinga", + "level9": "South Kalinga", + "level10": "Southeastern Kalinga" + }, + "kmm": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Kolhrengic" + }, + "kmn": { + "level0": "Sepik", + "level1": "Ram" + }, + "kmo": { + "level0": "Sepik", + "level1": "Nukuma" + }, + "kmp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Northern Samba-Duru", + "level7": "Vere-Gimme", + "level8": "Koma Alantika" + }, + "kmq": { + "level0": "Koman" + }, + "kmr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Laki-Kurdish", + "level8": "Kurdish" + }, + "kms": { + "level0": "Nuclear Torricelli", + "level1": "Marienberg", + "level2": "Elepi-Kamasau-Marienberg" + }, + "kmt": { + "level0": "Nimboranic", + "level1": "Outer Nimboranic", + "level2": "Mlap-Gresi-Kemtuik", + "level3": "Gresi-Kemtuik" + }, + "kmu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria", + "level5": "Kamano-Yagaria" + }, + "kmv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Circum-Caribbean French", + "level16": "Guyanic Creole French" + }, + "kmw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Komoic", + "level15": "Bilaic" + }, + "kmx": { + "level0": "Kiwaian" + }, + "kmy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Northern Samba-Duru", + "level7": "Vere-Gimme", + "level8": "Vere" + }, + "kmz": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz", + "level4": "East Oghuz" + }, + "kna": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Tangalic" + }, + "knb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Central and South Kalinga" + }, + "knc": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Kanuri-Kanembu", + "level3": "Kanuric", + "level4": "East Kanuri" + }, + "knd": { + "level0": "Konda-Yahadian" + }, + "kne": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Bontok-Kankanay", + "level8": "Kankanay" + }, + "knf": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Manjaku-Mankanya-Pepel" + }, + "kng": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "Southeastern Kikongo", + "level20": "Southern Kikongo", + "level21": "Koongo-Kituba" + }, + "knh": { + "level0": "Bookkeeping" + }, + "kni": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic", + "level5": "Kanufi-Ninkyob-Angan" + }, + "knj": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Kanjobalan-Chujean", + "level4": "Kanjobalan", + "level5": "Kanjobal-Jacaltec" + }, + "knk": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Nuclear Mokole" + }, + "knl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Ibanic" + }, + "knm": { + "level0": "Katukinan" + }, + "knn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani", + "level9": "Old-Modern Marathi", + "level10": "Modern Marathi", + "level11": "Western Marathi" + }, + "kno": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Vai-Kono" + }, + "knp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Konja" + }, + "knq": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian", + "level4": "Maniq-Menraq-Batek", + "level5": "Maniqic" + }, + "knr": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Eastern Sepik Hill" + }, + "kns": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian", + "level4": "Maniq-Menraq-Batek", + "level5": "Maniqic" + }, + "knt": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Marubo Subgroup" + }, + "knu": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Kpelle" + }, + "knw": { + "level0": "Kxa", + "level1": "Ju-Kung" + }, + "knx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Western Malayic Dayak" + }, + "kny": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde", + "level9": "Lubaic" + }, + "knz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "Northern Grusi" + }, + "koa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage" + }, + "kob": { + "level0": "Bookkeeping" + }, + "koc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic", + "level10": "Unclassified Ngembaic" + }, + "kod": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Sumba", + "level6": "Kodi-Gaura" + }, + "koe": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southwest Surmic", + "level3": "Baale-Olam" + }, + "kof": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Gera-Geruma-Kubi-Deno", + "level9": "Kubi-Deno" + }, + "kog": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Northern Magdalenic", + "level4": "Arhuacic" + }, + "koh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Mboshi (C.20)", + "level10": "Koyo-Mboshi" + }, + "koi": { + "level0": "Uralic", + "level1": "Permian", + "level2": "Komi" + }, + "koj": { + "level0": "Bookkeeping" + }, + "koo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Rwenzori" + }, + "kop": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Nuru" + }, + "koq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ndasaic", + "level8": "Kota-Mahongwe" + }, + "kor": { + "level0": "Koreanic" + }, + "kos": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Kosraean-Nauruan" + }, + "kot": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Central" + }, + "kou": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Inland Bua", + "level6": "Bolgo-Koke" + }, + "kov": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "Ningic" + }, + "kow": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang" + }, + "kox": { + "level0": "Bookkeeping" + }, + "koy": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Koyukonic" + }, + "koz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Kowan" + }, + "kpa": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Tangalic", + "level7": "Nuclear Tangalic", + "level8": "Tangale-Kwami-Kupto", + "level9": "Kwami-Kupto" + }, + "kpb": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "kpc": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Northeast Japura-Colombia", + "level4": "Baniwa-Curripaco-Tariano", + "level5": "Baniwa-Curripaco" + }, + "kpd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Central Aru", + "level4": "Dobel-Koba" + }, + "kpf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Cromwell", + "level5": "Kabwum", + "level6": "Selepet-Komba" + }, + "kpg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Carolinean Outlier Polynesian" + }, + "kph": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "River Oti North Guang" + }, + "kpi": { + "level0": "Geelvink Bay", + "level1": "Barapasi-Sauri-Kofei", + "level2": "Sauri-Kofei" + }, + "kpj": { + "level0": "Nuclear-Macro-Je" + }, + "kpk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Kpan-Icen" + }, + "kpl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "River Western Mundu-Baka", + "level8": "Monzomboic", + "level9": "Kpala-Bakpa" + }, + "kpm": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Koho-Maa" + }, + "kpn": { + "level0": "Tupian" + }, + "kpo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Kposo-Ahlo-Bowili" + }, + "kpq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Mek", + "level2": "Western Mek" + }, + "kpr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Coastal Binanderean", + "level6": "Gaena-Korafe" + }, + "kps": { + "level0": "West Bird's Head", + "level1": "South West Bird's Head" + }, + "kpt": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic" + }, + "kpu": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar" + }, + "kpv": { + "level0": "Uralic", + "level1": "Permian", + "level2": "Komi" + }, + "kpw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "Kalam-Kobon" + }, + "kpx": { + "level0": "Koiarian", + "level1": "Koiaric", + "level2": "Biage-Mountain Koiali" + }, + "kpy": { + "level0": "Chukotko-Kamchatkan", + "level1": "Chukotian", + "level2": "R-Koryakic", + "level3": "J-Koryakic" + }, + "kpz": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Elgon-Mau Kalenjin" + }, + "kqa": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "North Sogeram" + }, + "kqb": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Trans Vitiaz" + }, + "kqc": { + "level0": "Manubaran" + }, + "kqd": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic", + "level11": "Trans-Zab", + "level12": "Western Trans-Zab" + }, + "kqe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Mansakan", + "level5": "Western Mansakan", + "level6": "Kagan-Kalagan" + }, + "kqf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Kakabai linkage" + }, + "kqg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Dogoso-Khe" + }, + "kqh": { + "level0": "Bookkeeping" + }, + "kqi": { + "level0": "Koiarian", + "level1": "Koiaric", + "level2": "Koita-Koiari" + }, + "kqj": { + "level0": "South Bougainville", + "level1": "Nasioiic", + "level2": "Nasioi", + "level3": "South-Central Nasioi", + "level4": "South Nasioi" + }, + "kqk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Western Phla-Phera" + }, + "kql": { + "level0": "Yuat", + "level1": "Miyak-Bun-Biwat" + }, + "kqm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Kaansa-Dogose", + "level7": "Dogose-Khisa" + }, + "kqn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde", + "level9": "Kaonde-Shaba-Sanga" + }, + "kqo": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee" + }, + "kqp": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.2", + "level5": "East Chadic A.2 2", + "level6": "Gabri-Kimre" + }, + "kqq": { + "level0": "Nuclear-Macro-Je", + "level1": "Maxakali-Borum" + }, + "kqr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic" + }, + "kqs": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Southern Mel", + "level3": "Kissi" + }, + "kqt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic" + }, + "kqu": { + "level0": "Tuu", + "level1": "!Ui", + "level2": "Eastern !Ui" + }, + "kqv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Eastern Murutic" + }, + "kqw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Kandas-Duke of York" + }, + "kqx": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Central" + }, + "kqy": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "East Ometo" + }, + "kqz": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Khoekhoe", + "level3": "South Khoekhoe" + }, + "kra": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Unclassified Bihari" + }, + "krb": { + "level0": "Miwok-Costanoan", + "level1": "Costanoan" + }, + "krc": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "West Kipchak", + "level6": "Kaukasus Kipchak" + }, + "krd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Eastern Timor", + "level4": "Kawaimina" + }, + "kre": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Goyaz" + }, + "krf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage", + "level7": "Koto-Olrat-Lakon" + }, + "krg": { + "level0": "Bookkeeping" + }, + "krh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru" + }, + "kri": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "West African Creole English" + }, + "krj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "West Bisayan", + "level6": "Kinarayan" + }, + "krk": { + "level0": "Chukotko-Kamchatkan", + "level1": "Chukotian", + "level2": "R-Koryakic", + "level3": "J-Koryakic" + }, + "krl": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Ladogan" + }, + "krn": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee", + "level5": "Guere-Krahn" + }, + "krp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Kiong-Korop" + }, + "krs": { + "level0": "Kresh-Aja", + "level1": "Kreshic" + }, + "krt": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Kanuri-Kanembu", + "level3": "Kanuric", + "level4": "East Kanuri" + }, + "kru": { + "level0": "Dravidian", + "level1": "North Dravidian", + "level2": "Kurux-Malto" + }, + "krw": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee", + "level5": "Guere-Krahn" + }, + "krx": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "FH-Jola", + "level7": "PF-Jola", + "level8": "Kwatay-Karon-Mlomp", + "level9": "Karon-Mlomp" + }, + "kry": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Southern Samur" + }, + "krz": { + "level0": "Yam", + "level1": "Kanum", + "level2": "Ngkrn-Ngkantr" + }, + "ksa": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "ksb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "West Ruvu", + "level11": "Seuta", + "level12": "Bondei-Shambala" + }, + "ksc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Central and South Kalinga", + "level9": "South Kalinga" + }, + "ksd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Patpatar-Minigir-Tolai", + "level9": "Minigir-Tolai" + }, + "kse": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "West Central Papuan linkage", + "level9": "Nuclear West Central Papuan linkage" + }, + "ksf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Bafia (A.50)", + "level8": "Nuclear Bafia (A.50)", + "level9": "Lefa-Bafia" + }, + "ksg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "East New Georgia", + "level11": "Rovianic", + "level12": "Hoava-Kusaghe" + }, + "ksh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "West Middle German", + "level8": "Middle Franconian", + "level9": "Ripuarian" + }, + "ksi": { + "level0": "Sko" + }, + "ksj": { + "level0": "Kwalean", + "level1": "Humene-Kwale" + }, + "ksk": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dhegiha", + "level3": "Osage-Kansa" + }, + "ksl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage", + "level9": "Mumeng", + "level10": "Dambi-Kumaru" + }, + "ksm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang" + }, + "ksn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Tagalogic" + }, + "kso": { + "level0": "Bookkeeping" + }, + "ksp": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone", + "level7": "Gore" + }, + "ksq": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Tangalic", + "level7": "Nuclear Tangalic", + "level8": "Tangale-Kwami-Kupto", + "level9": "Kwami-Kupto" + }, + "ksr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Rawlinson", + "level5": "Pindiu", + "level6": "Kosorong-Burum-Mindik" + }, + "kss": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Southern Mel", + "level3": "Kissi" + }, + "kst": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi" + }, + "ksu": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Assam Tai B" + }, + "ksv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Tetelaic" + }, + "ksw": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Southern Karen", + "level3": "Sgaw" + }, + "ksx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata" + }, + "ksy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga" + }, + "ksz": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric", + "level5": "Kodaku-Korwa" + }, + "kta": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric" + }, + "ktb": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Highland East Cushitic", + "level4": "Sidaama-Hadiyya-Kambaata", + "level5": "Hadiyya-Kambaata", + "level6": "Kambaataic" + }, + "ktc": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Unclassified Boleic" + }, + "ktd": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Unclassified Wati" + }, + "kte": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Kyirong-Kagate", + "level9": "Gyalsumdo-Nubri-Kyirong" + }, + "ktf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Unclassified Greater Lega" + }, + "ktg": { + "level0": "Pama-Nyungan", + "level1": "Kalkatungic" + }, + "kth": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Maba-Masalit", + "level3": "Macro-Maba" + }, + "kti": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Lowland Ok", + "level6": "Division A Lowland Ok" + }, + "ktj": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Ivorian Grebo", + "level6": "Tepo-Plapo" + }, + "ktk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Western Admiralty Islands", + "level6": "Anchorite" + }, + "ktl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Balochic", + "level8": "Southern-Western Balochi", + "level9": "Southern Balochi-Koroshi" + }, + "ktm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Kurti-Kele-Ere", + "level9": "Kurti-Elu" + }, + "ktn": { + "level0": "Tupian", + "level1": "Arikem-Tupari", + "level2": "Arikemic" + }, + "ktp": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Bi-Ka" + }, + "ktq": { + "level0": "Unclassifiable" + }, + "kts": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Lowland Ok", + "level6": "Division A Lowland Ok" + }, + "ktt": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Dumut", + "level6": "Ketum-Wambon" + }, + "ktu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "Southeastern Kikongo", + "level20": "Southern Kikongo", + "level21": "Koongo-Kituba" + }, + "ktv": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "Katu", + "level3": "Nuclear Katu" + }, + "ktw": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "California Athabaskan" + }, + "ktx": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano" + }, + "kty": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Middle Bomokandian", + "level15": "Late Bomokandian" + }, + "ktz": { + "level0": "Kxa", + "level1": "Ju-Kung" + }, + "kua": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Ndonga (R.20)" + }, + "kub": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid" + }, + "kuc": { + "level0": "Tor-Orya", + "level1": "Tor", + "level2": "Coastal Tor" + }, + "kud": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic" + }, + "kue": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Simbu", + "level3": "Nuclear Simbu", + "level4": "Kuman-Dom-Gunaa" + }, + "kuf": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "Katu", + "level3": "Nuclear Katu" + }, + "kug": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid", + "level6": "Dibo-Kupa" + }, + "kuh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Tangalic", + "level7": "Nuclear Tangalic" + }, + "kui": { + "level0": "Cariban", + "level1": "Kuikuroan", + "level2": "Nuclear Kuikuroan" + }, + "kuj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "North Mara", + "level12": "Kuriaic" + }, + "kuk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Manggaraiic" + }, + "kul": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.4", + "level5": "Ronic" + }, + "kum": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "West Kipchak", + "level6": "Kaukasus Kipchak" + }, + "kuo": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Cromwell", + "level5": "Dallman", + "level6": "Kinalakna-Kumukio" + }, + "kup": { + "level0": "Kunimaipan" + }, + "kuq": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva", + "level7": "Nuclear Kawahiva", + "level8": "Central Kawahiva" + }, + "kus": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Southeast Western Oti-Volta" + }, + "kuu": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Tanana-Tutchone", + "level5": "Tananaic" + }, + "kuv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Teor-Kur" + }, + "kuw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic", + "level9": "Mid-Southern Central Core Bandaic" + }, + "kux": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Pintupic" + }, + "kuy": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Northeastern Pama", + "level4": "Umpilaic" + }, + "kva": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic", + "level4": "Bagvalal-Tindi" + }, + "kvb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Central Sumatran Malay" + }, + "kvc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Ngero", + "level8": "Eastern Ngero", + "level9": "Kaliai-Kove" + }, + "kvd": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "Central Alor" + }, + "kve": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Eastern Murutic" + }, + "kvf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.2", + "level5": "East Chadic A.2 2" + }, + "kvg": { + "level0": "Anim", + "level1": "Marind-Boazi-Yaqai", + "level2": "Boazi" + }, + "kvh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Manggaraiic" + }, + "kvi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.3" + }, + "kvj": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Higic" + }, + "kvk": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "JSLic" + }, + "kvl": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen", + "level3": "Kayaw-Manu" + }, + "kvm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Mamfe", + "level6": "Kendem-Denya" + }, + "kvn": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Eastern Isthmic Chibchan", + "level4": "Kuna" + }, + "kvo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Central Aru", + "level4": "Dobel-Koba" + }, + "kvp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Ujir-Kola-Kompane", + "level4": "Kola-Kompane" + }, + "kvq": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen", + "level3": "Geba-Bwe" + }, + "kvr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay", + "level6": "Kerinci-Minangkabau" + }, + "kvs": { + "level0": "Bookkeeping" + }, + "kvt": { + "level0": "Bookkeeping" + }, + "kvu": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Northern Karen" + }, + "kvv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Ujir-Kola-Kompane", + "level4": "Kola-Kompane" + }, + "kvw": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "East Alor", + "level3": "Sawila-Wersing" + }, + "kvx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Western Rajasthani", + "level11": "Indus Rajasthani" + }, + "kvy": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen", + "level3": "Kayah-Yintale" + }, + "kvz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Becking-Dawi", + "level5": "Tsakwambo-Komyandaret" + }, + "kwa": { + "level0": "Naduhup", + "level1": "Eastern Naduhup" + }, + "kwb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian" + }, + "kwc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain", + "level14": "Likwala-Likuba" + }, + "kwd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita" + }, + "kwe": { + "level0": "Greater Kwerba", + "level1": "Kwerba-Samarokena", + "level2": "Kwerbaic" + }, + "kwf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita" + }, + "kwg": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Peripherique", + "level6": "Barh Keita", + "level7": "Sara-Kaba" + }, + "kwh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "Banda-Geser", + "level4": "Seran Laut", + "level5": "Koiwai-Irarutu" + }, + "kwi": { + "level0": "Barbacoan", + "level1": "Awa-Southern Barbacoan" + }, + "kwj": { + "level0": "Sepik", + "level1": "Nukuma", + "level2": "Kwanga-Mende" + }, + "kwk": { + "level0": "Wakashan", + "level1": "Northern Wakashan", + "level2": "Kwakiutlan" + }, + "kwl": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Kofyar-Mushere-Chip", + "level7": "Kofyaric" + }, + "kwm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Ndonga (R.20)", + "level12": "Kwambi-Ndonga" + }, + "kwn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Kwangali-Diriku" + }, + "kwo": { + "level0": "Kwomtari-Nai" + }, + "kwp": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Unclassified Eastern Kru" + }, + "kwq": { + "level0": "Bookkeeping" + }, + "kwr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Kwer-Kopkaka-Burumakok", + "level6": "Kwer-Burumakok" + }, + "kws": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbala-Holu-Sondi (K.10)", + "level10": "Holu (K.10)", + "level11": "Pheende-Kwezo" + }, + "kwt": { + "level0": "Tor-Orya", + "level1": "Tor" + }, + "kwu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)" + }, + "kwv": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Peripherique", + "level6": "Barh Keita", + "level7": "Sara-Kaba" + }, + "kww": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Surinamese Creole English", + "level13": "Eastern Maroons", + "level14": "Ndyuka" + }, + "kwx": { + "level0": "Dravidian", + "level1": "Unclassified Dravidian" + }, + "kwy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo" + }, + "kwz": { + "level0": "Khoe-Kwadi" + }, + "kxa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Kairiruic linkage" + }, + "kxb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano" + }, + "kxc": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Konsoid" + }, + "kxd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "East Borneo Malay", + "level6": "Banjar-Berau-Brunei Malay", + "level7": "Berau-Brunei Malay", + "level8": "Bruneic Malay", + "level9": "Brunei-Bacan Malay" + }, + "kxe": { + "level0": "Bookkeeping" + }, + "kxf": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen", + "level3": "Kayaw-Manu" + }, + "kxg": { + "level0": "Bookkeeping" + }, + "kxh": { + "level0": "South Omotic", + "level1": "AHK", + "level2": "Hamer-Karo" + }, + "kxi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic", + "level8": "Lowland Murut" + }, + "kxj": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Peripherique", + "level6": "Koulfaic" + }, + "kxk": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Northern Karen" + }, + "kxm": { + "level0": "Austroasiatic", + "level1": "Khmeric" + }, + "kxn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Melanau-Kajang", + "level5": "Melanau", + "level6": "Sibu-Kanowit-Tanjong" + }, + "kxp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Gujaratic", + "level10": "Western Gujaratic" + }, + "kxq": { + "level0": "Yam", + "level1": "Kanum" + }, + "kxr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Koro-Lele-Nali-Titan" + }, + "kxs": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Southern Periphery Mongolic", + "level3": "Shirongol", + "level4": "Baoanic" + }, + "kxt": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Sawosic" + }, + "kxu": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Konda-Kui", + "level4": "Manda-Kui", + "level5": "Kui-Kuvi" + }, + "kxv": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Konda-Kui", + "level4": "Manda-Kui", + "level5": "Kui-Kuvi" + }, + "kxw": { + "level0": "East Strickland" + }, + "kxx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain", + "level14": "Likwala-Likuba" + }, + "kxy": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Kayong-Jeh-Halang" + }, + "kxz": { + "level0": "Kiwaian", + "level1": "Turama-Kerewo" + }, + "kya": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Suguti" + }, + "kyb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Central and South Kalinga", + "level9": "South Kalinga" + }, + "kyc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Engan" + }, + "kyd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Karey-Barakai" + }, + "kye": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "River Oti North Guang" + }, + "kyf": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Beteic", + "level3": "Eastern Bete" + }, + "kyg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria", + "level5": "Kamano-Yagaria" + }, + "kyi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Lower Baram", + "level6": "Central Lower Baram A" + }, + "kyj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Southern Cordilleran", + "level6": "West Southern Cordilleran", + "level7": "Nuclear Southern Cordilleran" + }, + "kyk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Mansakan" + }, + "kyl": { + "level0": "Kalapuyan" + }, + "kym": { + "level0": "Bookkeeping" + }, + "kyn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Negrosanon" + }, + "kyo": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar" + }, + "kyp": { + "level0": "Bookkeeping" + }, + "kyq": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Bagirmic" + }, + "kyr": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Mundurukuic" + }, + "kys": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik", + "level5": "Kayanic" + }, + "kyt": { + "level0": "Kayagaric", + "level1": "Kaygir-Tamagario" + }, + "kyu": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Central Karen", + "level3": "Kayah-Yintale", + "level4": "Kayah" + }, + "kyv": { + "level0": "Bookkeeping" + }, + "kyw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan", + "level10": "Sadanic" + }, + "kyx": { + "level0": "North Bougainville" + }, + "kyy": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Unclassified Kainantu" + }, + "kyz": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI" + }, + "kza": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "Karaboro" + }, + "kzb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits" + }, + "kzc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Kulango-Lorom", + "level5": "Kulango" + }, + "kzd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Taliaboic" + }, + "kze": { + "level0": "Bookkeeping" + }, + "kzf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Greater Kaili", + "level6": "Common Kaili" + }, + "kzg": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Amami" + }, + "kzh": { + "level0": "Nubian", + "level1": "Nile Nubian" + }, + "kzi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Dayic" + }, + "kzk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia" + }, + "kzl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku" + }, + "kzm": { + "level0": "South Bird's Head Family" + }, + "kzn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Chuwaboic", + "level9": "Lolo-Kokola" + }, + "kzo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Mbere (B.60)", + "level19": "Tsitsekeic", + "level20": "Lekaningic" + }, + "kzp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Gorontalic" + }, + "kzq": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic" + }, + "kzr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Central Mbum", + "level6": "Karangic" + }, + "kzs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic", + "level7": "Kadazan-Sugut-Minokok", + "level8": "Sugut-Minokok Kadazan" + }, + "kzu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Jayapura Bay", + "level8": "Eastern Jayapura Bay" + }, + "kzv": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Becking-Dawi", + "level5": "Tsakwambo-Komyandaret" + }, + "kzw": { + "level0": "Unclassifiable" + }, + "kzx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Uliase" + }, + "kzy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Komoic", + "level15": "Bilaic" + }, + "kzz": { + "level0": "West Bird's Head", + "level1": "South West Bird's Head" + }, + "laa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Subanen", + "level4": "Nuclear Subanen" + }, + "lac": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Yucatecan", + "level3": "Nuclear Yucatecan", + "level4": "Yucatec-Lacandon" + }, + "lad": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic", + "level13": "South Castilic" + }, + "lae": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Western West Himalayish", + "level4": "Lahaulic" + }, + "lag": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Mbugwe-Langi" + }, + "lai": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Nyika-Lambya" + }, + "laj": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Southern Lwoo", + "level4": "Lango-Kumam" + }, + "lam": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi", + "level8": "Malungu-Central Sabi", + "level9": "Central Sabi", + "level10": "Bisa-Lamba (M.50)" + }, + "lan": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Kainji Lake", + "level5": "Upper Niger Kainji" + }, + "lao": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Thai PH", + "level9": "Lao-Thai" + }, + "lap": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone", + "level7": "Gore" + }, + "laq": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Eastern Kra" + }, + "lar": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "South Guang", + "level8": "Hill South Guang" + }, + "las": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Eastern Grusi", + "level9": "Kabiyeic" + }, + "lat": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin" + }, + "lau": { + "level0": "Bookkeeping" + }, + "lav": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Eastern Baltic" + }, + "law": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Northern Tomini" + }, + "lax": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Tiwa-Boro" + }, + "lay": { + "level0": "Bookkeeping" + }, + "laz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Lower Markham", + "level9": "Busu" + }, + "lba": { + "level0": "Bookkeeping" + }, + "lbb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Label-Bilur" + }, + "lbc": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Lakkia-Biao" + }, + "lbe": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian" + }, + "lbf": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Western West Himalayish", + "level4": "Lahaulic" + }, + "lbg": { + "level0": "Bookkeeping" + }, + "lbi": { + "level0": "Speech Register", + "level1": "Atlantic-Congo Speech Register" + }, + "lbj": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Western Archaic Tibetan", + "level5": "Kenhatic" + }, + "lbk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Bontok-Kankanay", + "level8": "Bontok" + }, + "lbm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga" + }, + "lbn": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic" + }, + "lbo": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Nuclear West Bahnaric", + "level4": "Loven-Suq" + }, + "lbq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Lower Markham" + }, + "lbr": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Upper Arun", + "level6": "Lohorung-Yamphu" + }, + "lbs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Arab Sign" + }, + "lbt": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Lachic" + }, + "lbu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Lower Markham" + }, + "lbv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Madak linkage" + }, + "lbw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Western Bungku-Tolaki", + "level8": "West Coast Bungku-Tolaki" + }, + "lbx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "North East Greater Barito" + }, + "lby": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Lamalamic", + "level3": "Coastal Lamalamic" + }, + "lbz": { + "level0": "Tangkic" + }, + "lcc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "Maya-Matbat", + "level6": "Raja Ampat Maya" + }, + "lcd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Central Aru" + }, + "lce": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay", + "level6": "Bangka-Belitung Malay" + }, + "lcf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay", + "level6": "Kerinci-Minangkabau", + "level7": "Minangkabauic" + }, + "lch": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Ngangela-Nyemba" + }, + "lcl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "West Central Maluku", + "level3": "Sula-Buru", + "level4": "Buruic" + }, + "lcm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tungak-Nalik" + }, + "lcp": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Waic", + "level5": "Wa-Lawa", + "level6": "Lawa" + }, + "lcq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "West Piru Bay", + "level5": "Hoamoal", + "level6": "West Hoamoal" + }, + "lcs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers", + "level4": "Amalumute", + "level5": "Northwest Seram" + }, + "ldb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Koroic", + "level7": "Duyaic" + }, + "ldd": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Northwest South Bauchi", + "level7": "Polci-Luri" + }, + "ldg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross", + "level7": "Mbembe-Legbo", + "level8": "Legboic", + "level9": "Lenyima-Leyigha" + }, + "ldh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Dakoid", + "level6": "Taram-Dirim-Nnakenyare", + "level7": "Dirim-Nnakenyare" + }, + "ldi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Nuclear Northern Kikongo" + }, + "ldj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos", + "level10": "Chokobo-Lemoro-Sanga", + "level11": "Lemoro-Sanga" + }, + "ldk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Southern Bikwin-Jen", + "level6": "Bambuka-Gomu-Leelau" + }, + "ldl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bena-Mboi" + }, + "ldm": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Northern Mel" + }, + "ldn": { + "level0": "Artificial Language" + }, + "ldo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Northern Bikwin-Jen", + "level6": "Burak-Loo" + }, + "ldp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja", + "level7": "Tulaic" + }, + "ldq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Yukubenic", + "level5": "Bete-Lufu" + }, + "lea": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Mituku-Lega", + "level9": "Lega", + "level10": "Western Lega" + }, + "leb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi", + "level8": "Malungu-Central Sabi", + "level9": "Central Sabi", + "level10": "Bisa-Lamba (M.50)" + }, + "led": { + "level0": "Central Sudanic", + "level1": "Lenduic", + "level2": "Bale" + }, + "lee": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "Northern Grusi" + }, + "lef": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo", + "level4": "Lelemic", + "level5": "Lelemi-Akpafu" + }, + "leh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Greater Eastern Botatwe", + "level9": "Central Eastern Botatwe", + "level10": "Kafue" + }, + "lei": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Kabenau" + }, + "lej": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Lebonya" + }, + "lek": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus" + }, + "lel": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Bushoong-Wongo-Lele" + }, + "lem": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Sanaga-West Mbam (A.40)", + "level10": "West Mbam (A.40)", + "level11": "Mandi-Nyokon" + }, + "leo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Sanaga-West Mbam (A.40)", + "level10": "Sanaga (A.60)" + }, + "lep": { + "level0": "Sino-Tibetan", + "level1": "Himalayish" + }, + "leq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Engan" + }, + "ler": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "South-East Admiralty" + }, + "les": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe", + "level2": "Mangbutu-Efe", + "level3": "Leseic" + }, + "let": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Arawe", + "level11": "East Arawe" + }, + "leu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tungak-Nalik" + }, + "lev": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar" + }, + "lew": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Greater Kaili", + "level6": "Common Kaili" + }, + "lex": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Luangic-Kisaric", + "level5": "Luangic" + }, + "ley": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Badaic-Limola" + }, + "lez": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Eastern Samur", + "level5": "Tabasaran-Aghul-Lezgi", + "level6": "Aghul-Lezgi" + }, + "lfa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Bafia (A.50)", + "level8": "Nuclear Bafia (A.50)", + "level9": "Lefa-Bafia" + }, + "lfn": { + "level0": "Artificial Language" + }, + "lga": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "West New Georgia", + "level11": "Simboic", + "level12": "Ghanongga-Lungga" + }, + "lgb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "West Santa Isabel" + }, + "lgg": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Central Moru-Madi" + }, + "lgh": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji" + }, + "lgi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Dayic" + }, + "lgk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Central-Western Malakula" + }, + "lgl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita" + }, + "lgm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Mituku-Lega", + "level9": "Lega" + }, + "lgn": { + "level0": "Koman", + "level1": "Central Koman", + "level2": "Dana-Opo" + }, + "lgq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo" + }, + "lgr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Guadalcanal-Nggelic", + "level6": "Nuclear Guadalcanal-Nggelic", + "level7": "North and West Guadalcanal" + }, + "lgs": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "lgt": { + "level0": "Sepik", + "level1": "Sepik Tama", + "level2": "Mehek-Pahi" + }, + "lgu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira" + }, + "lgz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ngombe-Genja" + }, + "lha": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Southern Kra" + }, + "lhh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Ambonic", + "level8": "Central Ambon" + }, + "lhi": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Lahoid" + }, + "lhl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic", + "level9": "Bhadrawahi-Bhalesi-Curahi", + "level10": "Bhadarwahic", + "level11": "Chinali-Lahul Lohar" + }, + "lhm": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic" + }, + "lhn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Melanau-Kajang", + "level5": "Kajang", + "level6": "Kajaman-Lahanan" + }, + "lhp": { + "level0": "Sino-Tibetan", + "level1": "Dhimal-Lhokpu-Toto" + }, + "lhs": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "Turoyo-Mlahso" + }, + "lht": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage", + "level7": "Hiw-Lo-Toga" + }, + "lhu": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Lahoid" + }, + "lia": { + "level0": "Atlantic-Congo", + "level1": "Limba" + }, + "lib": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus II", + "level9": "Likum-Levei" + }, + "lic": { + "level0": "Tai-Kadai", + "level1": "Hlaic", + "level2": "Nuclear Hlaic" + }, + "lid": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus I" + }, + "lie": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Libinzic", + "level13": "Libinza Ngiri" + }, + "lif": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Tamar" + }, + "lig": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Jogo-Jeri", + "level6": "Jogo" + }, + "lih": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tabar linkage" + }, + "lij": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian" + }, + "lik": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Middle Bomokandian" + }, + "lil": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Northern Interior Salish" + }, + "lim": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "West Middle German", + "level8": "Middle Franconian", + "level9": "Ripuarian" + }, + "lin": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain", + "level14": "Bobangi-Bangala-Lingala", + "level15": "Lingala-Bangala" + }, + "lio": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Sobeic", + "level9": "Sobei-Liki" + }, + "lip": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo", + "level4": "Lelemic", + "level5": "Likpe-Santrokofi" + }, + "liq": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Highland East Cushitic", + "level4": "Sidaama-Hadiyya-Kambaata", + "level5": "Hadiyya-Kambaata", + "level6": "Hadiyyaic" + }, + "lir": { + "level0": "Pidgin", + "level1": "English-based pidgin" + }, + "lis": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu" + }, + "lit": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Eastern Baltic" + }, + "liu": { + "level0": "Dajuic", + "level1": "Eastern Dajuic" + }, + "liv": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic" + }, + "liw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Central Sumatran Malay", + "level6": "Music" + }, + "lix": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Munan", + "level9": "Munic", + "level10": "Western Munic" + }, + "liy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic" + }, + "liz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Libinzic", + "level13": "Libinza Ngiri" + }, + "lje": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic" + }, + "lji": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Southern Kaili-Wolio", + "level5": "Island Kaili-Wolio", + "level6": "Kalao-Laiyolo" + }, + "ljl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Central Flores-Paluqe", + "level6": "Central Flores", + "level7": "Eastern Central Flores", + "level8": "Ende-Lio" + }, + "ljp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Lampungic", + "level3": "Pesisir" + }, + "ljw": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric" + }, + "ljx": { + "level0": "Pama-Nyungan", + "level1": "Nyawaygic" + }, + "lka": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Lakalei-Idate" + }, + "lkb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia", + "level14": "Kabarasi-Tachoni-Nyala East" + }, + "lkc": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Lahoid" + }, + "lkd": { + "level0": "Nambiquaran", + "level1": "Nambikwara Complex", + "level2": "Northern Nambiquaran", + "level3": "Roosevelt" + }, + "lke": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "North Nyanza", + "level11": "Soga-Kenyi" + }, + "lkh": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic" + }, + "lki": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Laki-Kurdish" + }, + "lkj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Ibanic", + "level5": "Iban-Mualang-Seberuang", + "level6": "Iban-Seberuang", + "level7": "Northern Iban" + }, + "lkl": { + "level0": "Nuclear Torricelli" + }, + "lkm": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Mirning" + }, + "lkn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage", + "level7": "Koto-Olrat-Lakon" + }, + "lko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Western Luyia", + "level14": "Marachi-Khayo" + }, + "lkr": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Northern Lwoo" + }, + "lks": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia", + "level14": "Kisa-Marama-Tsotso" + }, + "lkt": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dakotan", + "level3": "Sioux" + }, + "lku": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Pirriya-Kungkari" + }, + "lky": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Lotuxo", + "level5": "Lotuko-Lokoya" + }, + "lla": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bena-Mboi", + "level5": "Bena" + }, + "llb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Chuwaboic", + "level9": "Lolo-Kokola" + }, + "llc": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Nuclear Mokole" + }, + "lld": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian" + }, + "lle": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Koro-Lele-Nali-Titan" + }, + "llf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus I" + }, + "llg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "Nuclear Rote" + }, + "llh": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Unclassified Lisoid" + }, + "lli": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Nzebi-Laali-Yaa", + "level19": "Laali-Yaa" + }, + "llk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Lower Baram", + "level6": "Central Lower Baram B" + }, + "lll": { + "level0": "Bogia" + }, + "llm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Butonic", + "level9": "East Buton" + }, + "lln": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.2", + "level5": "East Chadic A.2 1" + }, + "llo": { + "level0": "Bookkeeping" + }, + "llp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Efate", + "level8": "North Efatic" + }, + "llq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Gorontalic" + }, + "lls": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "RSLic", + "level3": "Nuclear RSLic" + }, + "llu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita", + "level9": "North Malaitan" + }, + "llx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Eastern Fijian" + }, + "lma": { + "level0": "Atlantic-Congo", + "level1": "Limba" + }, + "lmb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Central Santo" + }, + "lmc": { + "level0": "Limilngan-Wulna" + }, + "lmd": { + "level0": "Narrow Talodi", + "level1": "Lumun-Torona" + }, + "lme": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "South Masa", + "level4": "Peveic" + }, + "lmf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Tengah", + "level5": "Southeast Lembata" + }, + "lmg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Bibling" + }, + "lmh": { + "level0": "Bookkeeping" + }, + "lmi": { + "level0": "Central Sudanic", + "level1": "Mangbetu-Asua" + }, + "lmj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Tengah" + }, + "lmk": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Anal-Lamgang" + }, + "lml": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu" + }, + "lmm": { + "level0": "Bookkeeping" + }, + "lmn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Western Rajasthani" + }, + "lmo": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian", + "level12": "Piemontese-Lombard" + }, + "lmp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Nkambe" + }, + "lmq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Timur" + }, + "lmr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Barat" + }, + "lms": { + "level0": "Bookkeeping" + }, + "lmt": { + "level0": "Bookkeeping" + }, + "lmu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Epi", + "level8": "Lamenu-Lewo" + }, + "lmv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Eastern Fijian", + "level7": "Nuclear Eastern Fijian", + "level8": "Viwa-Lomaiviti-East Viti Levu" + }, + "lmw": { + "level0": "Miwok-Costanoan", + "level1": "Miwokan", + "level2": "Western Miwokan" + }, + "lmx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "West Ring" + }, + "lmy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Sumba", + "level6": "Wewewa-Laboya" + }, + "lmz": { + "level0": "Unattested" + }, + "lna": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Ngbugu-Langbasi" + }, + "lnb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Ndonga (R.20)" + }, + "lnc": { + "level0": "Bookkeeping" + }, + "lnd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Dayic" + }, + "lnh": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "Senoic", + "level4": "Lanoh-Semnam-Temiar", + "level5": "Lanoh-Semnam", + "level6": "Lanohic" + }, + "lni": { + "level0": "South Bougainville", + "level1": "Nasioiic", + "level2": "Nasioi", + "level3": "South-Central Nasioi", + "level4": "South Nasioi" + }, + "lnj": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama", + "level3": "Linngithigh-Alngith" + }, + "lnl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Ngbugu-Langbasi" + }, + "lnm": { + "level0": "Keram", + "level1": "Ulmapo", + "level2": "Mwakai-Pondi" + }, + "lnn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "Shark Bayic" + }, + "lno": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Lotuxo" + }, + "lns": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring" + }, + "lnu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda" + }, + "loa": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran", + "level3": "Galela-Loloda" + }, + "lob": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Lobiri-Jaane" + }, + "loc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "West Bisayan" + }, + "loe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Western Saluan-Banggai", + "level7": "Saluanic", + "level8": "Batui-Saluan" + }, + "lof": { + "level0": "Heibanic", + "level1": "West-Central Heibanic", + "level2": "Central Heibanic", + "level3": "Ebang-Logol" + }, + "log": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Central Moru-Madi" + }, + "loh": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southwest Surmic", + "level3": "Didinga-Murle", + "level4": "Didinga-Longarim" + }, + "loi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Kulango-Lorom", + "level5": "Teenic" + }, + "loj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "South-East Admiralty", + "level7": "Lou-Paluai" + }, + "lok": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Mende-Loma", + "level5": "Mende-Bandi", + "level6": "Mende-Loko" + }, + "lol": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Mongoic", + "level11": "Lomongo" + }, + "lom": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Mende-Loma", + "level5": "Loma" + }, + "lon": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe", + "level9": "Lomweic" + }, + "loo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "Kele-Lombo" + }, + "lop": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Kainji Lake", + "level5": "Upper Niger Kainji", + "level6": "Oleran" + }, + "loq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Interieur", + "level12": "Lobalic" + }, + "lor": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Kulango-Lorom", + "level5": "Teenic" + }, + "los": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "Mokoreng-Loniu" + }, + "lot": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Lotuxo", + "level5": "Lotuko-Lokoya" + }, + "lou": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Circum-Caribbean French" + }, + "lov": { + "level0": "Bookkeeping" + }, + "low": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Paitanic", + "level7": "Upper Kinabatangan-Lobu" + }, + "lox": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers", + "level4": "Amalumute", + "level5": "Northwest Seram" + }, + "loy": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Mustangic" + }, + "loz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Sotho-Tswana (S.30)", + "level11": "Western Sotho-Tswana", + "level12": "Central Sotho-Tswana", + "level13": "Sesotho-Lozi" + }, + "lpa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Efate", + "level8": "North Efatic" + }, + "lpe": { + "level0": "Lepki-Murkim-Kembra" + }, + "lpn": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Central Naga", + "level4": "Yimchingric", + "level5": "Makuric" + }, + "lpo": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Lipo-Lolopo", + "level7": "Lipo-Micha" + }, + "lpx": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Lotuxo", + "level5": "Lopit-Dongotono" + }, + "lra": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Benyadu-Bekati", + "level4": "Bakati'", + "level5": "Rara-Sara Bakati'" + }, + "lrc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Luric-Dezfulic", + "level8": "Luric" + }, + "lre": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian" + }, + "lri": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Western Luyia", + "level14": "Marachi-Khayo" + }, + "lrk": { + "level0": "Bookkeeping" + }, + "lrl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian" + }, + "lrm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia", + "level14": "Kisa-Marama-Tsotso" + }, + "lrn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Central Aru" + }, + "lro": { + "level0": "Heibanic", + "level1": "West-Central Heibanic", + "level2": "Central Heibanic", + "level3": "Ebang-Logol", + "level4": "Ebang-Laru" + }, + "lrr": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Upper Arun", + "level6": "Lohorung-Yamphu", + "level7": "Yamphuic" + }, + "lrt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay" + }, + "lrv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Central-Western Malakula" + }, + "lrz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage", + "level7": "Lemerig-Veraa" + }, + "lsa": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Komisenian" + }, + "lsc": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "lsd": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic", + "level11": "Northwestern Jewish Neo-Aramaic" + }, + "lse": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri" + }, + "lsg": { + "level0": "Bookkeeping" + }, + "lsh": { + "level0": "Sino-Tibetan", + "level1": "Kho-Bwa", + "level2": "Western Kho-Bwa", + "level3": "Chug-Lish" + }, + "lsi": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Northern Burmish", + "level5": "Maruic", + "level6": "Leqic" + }, + "lsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "lsm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Western Luyia", + "level14": "Saamiaic" + }, + "lsn": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "lso": { + "level0": "Bookkeeping" + }, + "lsp": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "Honduras-Panama Sign" + }, + "lsr": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Nuclear Palai", + "level4": "Bragat-Aruop-Amol" + }, + "lss": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic", + "level9": "Lasi-Jadgali" + }, + "lst": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "American Sign" + }, + "lsv": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "lsw": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "lsy": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "ltc": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic" + }, + "lti": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Luangic-Kisaric", + "level5": "Luangic" + }, + "ltn": { + "level0": "Nambiquaran", + "level1": "Nambikwara Complex", + "level2": "Northern Nambiquaran", + "level3": "Roosevelt" + }, + "lto": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia", + "level14": "Kisa-Marama-Tsotso" + }, + "lts": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia", + "level14": "Kabarasi-Tachoni-Nyala East" + }, + "ltu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Uliase", + "level8": "Hatuhaha", + "level9": "Saparuan", + "level10": "Saparua-Latu" + }, + "ltz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "West Middle German", + "level8": "Middle Franconian" + }, + "lua": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde", + "level9": "Lubaic", + "level10": "Bangubangu-Kasai" + }, + "lub": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde", + "level9": "Kaonde-Shaba-Sanga" + }, + "luc": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Central Moru-Madi" + }, + "lud": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Ladogan", + "level6": "East Ladoga" + }, + "lue": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Chokwe-Lwena" + }, + "luf": { + "level0": "Mailuan" + }, + "lug": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "North Nyanza" + }, + "lui": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Californian Uto-Aztecan", + "level3": "Cupan" + }, + "luj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Mbagani-Lwalwa" + }, + "luk": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic", + "level7": "Dzongkhic", + "level8": "Nuclear Dzongkhic" + }, + "lul": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Southern Moru-Madi" + }, + "lum": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Ngangela-Nyemba", + "level12": "Mbwela-Mbunda" + }, + "lun": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Ruund-Salampasu", + "level11": "Lunda-Ruund-Kete" + }, + "luo": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Southern Lwoo", + "level4": "Adhola-Alur-Luo", + "level5": "Adhola-Luo" + }, + "lup": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic", + "level21": "Lumbuic", + "level22": "Lumbu-Bwisi" + }, + "luq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Eastern Ede", + "level8": "Southeastern Ede", + "level9": "Nuclear Yoruba", + "level10": "Lucumi-Yoruba" + }, + "lus": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Mizoic" + }, + "lut": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "Lushootseed-Puget" + }, + "luu": { + "level0": "Bookkeeping" + }, + "luv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic", + "level9": "Unclassified Sindhic" + }, + "luw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mambila", + "level11": "Njerup" + }, + "luz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Luric-Dezfulic", + "level8": "Luric", + "level9": "Bakhtiari-Southern Lori" + }, + "lva": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku" + }, + "lvi": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric" + }, + "lvl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric" + }, + "lvu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Tengah", + "level5": "Southeast Lembata" + }, + "lwa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Mbagani-Lwalwa" + }, + "lwe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Timur" + }, + "lwg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia" + }, + "lwh": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Western Kra", + "level4": "Lachic" + }, + "lwl": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Waic", + "level5": "Wa-Lawa", + "level6": "Lawa" + }, + "lwm": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Bisoid", + "level7": "Bisu-Pyen-Laomian" + }, + "lwo": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Northern Lwoo", + "level4": "Luwo-Thuri" + }, + "lws": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "lwt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Barat", + "level5": "Flores Lamaholot" + }, + "lwu": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lawoish" + }, + "lww": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Epi", + "level8": "Lamenu-Lewo" + }, + "lxm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tungak-Nalik" + }, + "lya": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic", + "level7": "Dzongkhic", + "level8": "Nuclear Dzongkhic" + }, + "lyg": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Khasian", + "level3": "Khasi-Pnar-Lyngngam", + "level4": "Lyngngamic" + }, + "lyn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Greater Luyana", + "level8": "Eastern Greater Luyana" + }, + "lzh": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic" + }, + "lzl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Central-Western Malakula" + }, + "lzn": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southeastern Patkaian", + "level5": "Lainongic" + }, + "lzz": { + "level0": "Kartvelian", + "level1": "Georgian-Zan", + "level2": "Zan" + }, + "maa": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Northwest Alta Mazatec" + }, + "mab": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Teozacoalco Mixtec" + }, + "mad": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Maduresic" + }, + "mae": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Southeastern Benue-Congo Plateau" + }, + "maf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Matakam" + }, + "mag": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan" + }, + "mah": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian" + }, + "mai": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan" + }, + "maj": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Valley Mazatec" + }, + "mak": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Makassaric", + "level5": "Nuclear Makassaric" + }, + "mal": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "mam": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Mamean", + "level4": "Mamean" + }, + "maq": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan" + }, + "mar": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani", + "level9": "Old-Modern Marathi", + "level10": "Modern Marathi" + }, + "mas": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Ongamo-Maa", + "level5": "Nuclear Maa" + }, + "mat": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Matlatzincan" + }, + "mau": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Central Mazatec" + }, + "mav": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani" + }, + "maw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Southeast Western Oti-Volta", + "level13": "Mampruli-Dagbani" + }, + "max": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay", + "level7": "Manadoic Malay" + }, + "maz": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Mazahua" + }, + "mba": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "North Manobo", + "level5": "Kinamiguin-Bukidnon", + "level6": "Bukidnon" + }, + "mbb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "West Manobo", + "level7": "WBM-Livunganen-Ilianen" + }, + "mbc": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Pemong-Panare", + "level3": "Pemongan" + }, + "mbd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "East and Central Manobo", + "level7": "East Manobo" + }, + "mbf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay" + }, + "mbg": { + "level0": "Bookkeeping" + }, + "mbh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Bebeli-Mangseng" + }, + "mbi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "West Manobo", + "level7": "WBM-Livunganen-Ilianen" + }, + "mbj": { + "level0": "Naduhup" + }, + "mbk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Siau", + "level8": "Sissano-Tumleo", + "level9": "Sera-Sissano", + "level10": "Sissanoic" + }, + "mbl": { + "level0": "Nuclear-Macro-Je", + "level1": "Maxakali-Borum", + "level2": "Maxakalian", + "level3": "Nuclear Maxakalian" + }, + "mbn": { + "level0": "Guahiboan", + "level1": "Nuclear Guahiboan" + }, + "mbo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba", + "level9": "Bafaw-Balong-Manenguba", + "level10": "Manenguba" + }, + "mbp": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Northern Magdalenic", + "level4": "Arhuacic", + "level5": "Eastern-Southern Arhuacic", + "level6": "Eastern Arhuacic" + }, + "mbq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage" + }, + "mbr": { + "level0": "Kakua-Nukak" + }, + "mbs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "South Manobo", + "level6": "Sarangani-Tasaday-Cotabato" + }, + "mbt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "East and Central Manobo", + "level7": "Central Manobo" + }, + "mbu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan", + "level7": "Numan" + }, + "mbv": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Naluic" + }, + "mbw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Jimi" + }, + "mbx": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Central Sepik Hill", + "level3": "Nuclear Central Sepik Hill" + }, + "mby": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic", + "level9": "Unclassified Sindhic" + }, + "mbz": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec" + }, + "mca": { + "level0": "Mataguayan", + "level1": "Mataguayo I" + }, + "mcb": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Matsi-Nan" + }, + "mcc": { + "level0": "Anim", + "level1": "Tirio", + "level2": "Nuclear Tirio" + }, + "mcd": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano", + "level5": "Yaminawa Complex" + }, + "mce": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec", + "level8": "Chalcatongic" + }, + "mcf": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mayoruna Branch", + "level3": "Mayo Group", + "level4": "Matses subgroup" + }, + "mcg": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Mapoyo-Tamanaku", + "level3": "Mapoyo-Yawarana" + }, + "mch": { + "level0": "Cariban", + "level1": "Guianan", + "level2": "Maquiritari-Wayumara" + }, + "mci": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Rawlinson", + "level5": "Sankwep" + }, + "mcj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mbongno-Mvano", + "level11": "Mvano-Ndunda" + }, + "mck": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Ngangela-Nyemba", + "level12": "Mbwela-Mbunda" + }, + "mcl": { + "level0": "Tucanoan", + "level1": "Western Tucanoan", + "level2": "Napo Tucanoan", + "level3": "Siona-Secoya", + "level4": "Sionan" + }, + "mcm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Luso-Asian Creole" + }, + "mcn": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "North Masa", + "level4": "Masa-Gizey-Ham" + }, + "mco": { + "level0": "Mixe-Zoque", + "level1": "Mixe", + "level2": "Oaxaca Mixe", + "level3": "Lowland-Midland-South Highland Mixe", + "level4": "Lowland-Midland Mixe", + "level5": "Lowland Mixe" + }, + "mcp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Western A80", + "level10": "Makaaic", + "level11": "North-Central Makaaic" + }, + "mcq": { + "level0": "Koiarian", + "level1": "Baraic" + }, + "mcr": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Kapau-Menya" + }, + "mcs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Northern Mbum", + "level6": "Tupuri-Mundang-Mambai" + }, + "mcu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mambila", + "level11": "Eastern Mambila" + }, + "mcv": { + "level0": "Anim", + "level1": "Inland Gulf of Papua", + "level2": "West Inland Gulf of Papua" + }, + "mcw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.3" + }, + "mcx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Mpoic", + "level10": "Mpiemo-Ukhwejo" + }, + "mcy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Watut" + }, + "mcz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Silopic" + }, + "mda": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic", + "level5": "Rukubic", + "level6": "Mada-Ninzam" + }, + "mdb": { + "level0": "Kiwaian", + "level1": "Turama-Kerewo" + }, + "mdc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Mindjim", + "level4": "Lower Minjim", + "level5": "Inland Minjim" + }, + "mdd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic" + }, + "mde": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Maba-Masalit", + "level3": "Macro-Maba" + }, + "mdf": { + "level0": "Uralic", + "level1": "Mordvin" + }, + "mdg": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Maba-Masalit", + "level3": "Macro-Masalit" + }, + "mdh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Danaw" + }, + "mdi": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe", + "level2": "Mangbutu-Efe" + }, + "mdj": { + "level0": "Central Sudanic", + "level1": "Mangbetu-Asua", + "level2": "Mangbetuic" + }, + "mdk": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe", + "level2": "Mangbutu-Efe" + }, + "mdl": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "mdm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Eastern Mundu-Baka", + "level7": "Mayogo-Bangba" + }, + "mdn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Bwamba-Ngondi-Pande-Mbati-Aka" + }, + "mdo": { + "level0": "Bookkeeping" + }, + "mdp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbala-Holu-Sondi (K.10)", + "level10": "Mbala-Sondi" + }, + "mdq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke" + }, + "mdr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi" + }, + "mds": { + "level0": "Manubaran" + }, + "mdt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Mbere (B.60)", + "level19": "Tsitsekeic", + "level20": "Lekaningic" + }, + "mdu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Mboshi (C.20)" + }, + "mdv": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec", + "level8": "Chalcatongic", + "level9": "Atatlahuca-Monteverde" + }, + "mdw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Mboshi (C.20)", + "level10": "Koyo-Mboshi" + }, + "mdx": { + "level0": "Dizoid" + }, + "mdy": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo" + }, + "mdz": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup IV", + "level6": "Tupi-Guarani Subgroup IV.A" + }, + "mea": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Southwest Grassfields", + "level8": "Menka-Atong" + }, + "meb": { + "level0": "Turama-Kikori", + "level1": "Turama-Omatian" + }, + "mec": { + "level0": "Mangarrayi-Maran", + "level1": "Maran" + }, + "med": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Hagen", + "level3": "Melpa-Tembagla" + }, + "mee": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Mengenic", + "level9": "Mamusa-Mengen" + }, + "mef": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Khasian", + "level3": "Khasi-Pnar-Lyngngam", + "level4": "Lyngngamic" + }, + "meh": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec" + }, + "mei": { + "level0": "Nubian" + }, + "mej": { + "level0": "East Bird's Head", + "level1": "Meax" + }, + "mek": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "West Central Papuan linkage", + "level9": "Nuclear West Central Papuan linkage" + }, + "mel": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Melanau-Kajang", + "level5": "Melanau" + }, + "mem": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Marrngu" + }, + "men": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Mende-Loma", + "level5": "Mende-Bandi", + "level6": "Mende-Loko" + }, + "meo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric", + "level6": "Northeastern Peninsular Malay" + }, + "mep": { + "level0": "Jarrakan", + "level1": "Miriwunic" + }, + "meq": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Meri", + "level7": "Dugwor-Merey" + }, + "mer": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Eastern Kirinyaga", + "level10": "Northern Kirinyaga", + "level11": "Nithi-Meru" + }, + "mes": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Mubic" + }, + "met": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya" + }, + "meu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "West Central Papuan linkage" + }, + "mev": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Mano-Dan" + }, + "mew": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Unclassified Boleic" + }, + "mey": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic" + }, + "mez": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian" + }, + "mfa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric", + "level6": "Northeastern Peninsular Malay" + }, + "mfb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay", + "level6": "Bangka-Belitung Malay" + }, + "mfc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mbaic", + "level6": "Ndunga-Mba-Dongo", + "level7": "Ndunga-Mba" + }, + "mfd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic", + "level10": "Mankonic" + }, + "mfe": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French", + "level15": "Isle-de-France Creole" + }, + "mff": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Bebe-Kemezung", + "level8": "Naki-Kemezung", + "level9": "Nakic" + }, + "mfg": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Nuclear Mokole", + "level8": "Mixiforic" + }, + "mfh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Podoko" + }, + "mfi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Wandala-Malgwa-Glavda", + "level7": "Wandala-Malgwa" + }, + "mfj": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Matakam", + "level5": "Mefele-Cuvok" + }, + "mfk": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Mofu" + }, + "mfl": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Bura-Marghi", + "level6": "Buraic" + }, + "mfm": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Bura-Marghi", + "level6": "Marghic", + "level7": "Kilba-South Margi" + }, + "mfn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross", + "level7": "Mbembe-Legbo" + }, + "mfo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe" + }, + "mfp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay" + }, + "mfq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Gurma", + "level12": "Gurma B", + "level13": "Gourmantche-Moba", + "level14": "Moba-Bimoba" + }, + "mfr": { + "level0": "Western Daly", + "level1": "Bringen", + "level2": "Marithielic" + }, + "mfs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic" + }, + "mft": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "Mokoreng-Loniu" + }, + "mfu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Ngangela-Nyemba", + "level12": "Mbwela-Mbunda" + }, + "mfv": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Manjaku-Mankanya-Pepel", + "level6": "Cur-Bok-Cotier" + }, + "mfw": { + "level0": "Kwalean" + }, + "mfx": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo", + "level3": "Central Ometo" + }, + "mfy": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Cahitan" + }, + "mfz": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Burun", + "level3": "Southern Burun" + }, + "mgb": { + "level0": "Tamaic" + }, + "mgc": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "Baka-Beli", + "level3": "Morokodo-Beli", + "level4": "Gberi-Morokodo-Mittu" + }, + "mgd": { + "level0": "Central Sudanic", + "level1": "Moru-Madi" + }, + "mge": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone-Chari", + "level7": "Bediondo" + }, + "mgf": { + "level0": "Bulaka River" + }, + "mgg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Mpoic" + }, + "mgh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe" + }, + "mgi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Jilic-Eggonic", + "level5": "Jilic" + }, + "mgj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta", + "level5": "Unclassified Central Delta" + }, + "mgl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage" + }, + "mgm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Timor" + }, + "mgn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic", + "level7": "Nuclear Ngbandic" + }, + "mgo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Momo", + "level8": "Widikum-Tadkon" + }, + "mgp": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Magar" + }, + "mgq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mbeya" + }, + "mgr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mwika", + "level10": "Fipaic", + "level11": "Maluwawaru" + }, + "mgs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Manda-Ngoni" + }, + "mgt": { + "level0": "Keram", + "level1": "Ulmapo", + "level2": "Mwakai-Pondi" + }, + "mgu": { + "level0": "Mailuan" + }, + "mgv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic", + "level9": "Matengic" + }, + "mgw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic", + "level9": "Matumbic" + }, + "mgx": { + "level0": "Bookkeeping" + }, + "mgy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic" + }, + "mgz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Mbugwe-Langi" + }, + "mha": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Konda-Kui", + "level4": "Manda-Kui", + "level5": "Manda-Pengo" + }, + "mhb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ndasaic", + "level8": "Kota-Mahongwe" + }, + "mhc": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Kanjobalan-Chujean", + "level4": "Kanjobalan" + }, + "mhd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Pare-Taveta", + "level10": "Pareic" + }, + "mhe": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "South Aslian" + }, + "mhf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Finungwan-Mamaa-Gusan" + }, + "mhg": { + "level0": "Marrku-Wurrugu" + }, + "mhh": { + "level0": "Bookkeeping" + }, + "mhi": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Southern Moru-Madi" + }, + "mhj": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic" + }, + "mhk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Mbam-Nkam Nun" + }, + "mhl": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Kumil" + }, + "mhm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe" + }, + "mhn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Bairisch", + "level10": "Global South Bavarian" + }, + "mho": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Greater Luyana", + "level8": "Western Greater Luyana", + "level9": "Mashi-Mbukushi" + }, + "mhp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric" + }, + "mhq": { + "level0": "Siouan" + }, + "mhr": { + "level0": "Uralic", + "level1": "Mari" + }, + "mhs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "West Central Maluku", + "level3": "Sula-Buru", + "level4": "Buruic" + }, + "mht": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Northeast Japura-Colombia", + "level4": "Cassiquiare" + }, + "mhu": { + "level0": "Sino-Tibetan", + "level1": "Digarish" + }, + "mhv": { + "level0": "Bookkeeping" + }, + "mhw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Greater Luyana", + "level8": "Western Greater Luyana", + "level9": "Mashi-Mbukushi" + }, + "mhx": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Northern Burmish", + "level5": "Maruic" + }, + "mhy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Maanyan-Paku" + }, + "mhz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea" + }, + "mia": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian" + }, + "mib": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec", + "level8": "Chalcatongic", + "level9": "Atatlahuca-Monteverde" + }, + "mic": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Northern Eastern Algonquian", + "level6": "Micmacic" + }, + "mid": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Macro-Mandaic" + }, + "mie": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec" + }, + "mif": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Mofu" + }, + "mig": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec", + "level8": "Chalcatongic" + }, + "mih": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Coast Mixtec", + "level7": "East Coast Mixtec" + }, + "mii": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Northern Baja Mixtec" + }, + "mij": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Yemne-Kimbi" + }, + "mik": { + "level0": "Muskogean" + }, + "mil": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Tlazoyal-Penoles" + }, + "mim": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Guerrero Mixtec", + "level7": "Nuclear Guerrero Mixtec" + }, + "min": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay", + "level6": "Kerinci-Minangkabau", + "level7": "Minangkabauic" + }, + "mio": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Coast Mixtec", + "level7": "West Coast Mixtec" + }, + "mip": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Northeastern Alta Mixtec" + }, + "miq": { + "level0": "Misumalpan" + }, + "mir": { + "level0": "Mixe-Zoque", + "level1": "Mixe", + "level2": "Oaxaca Mixe", + "level3": "Lowland-Midland-South Highland Mixe", + "level4": "Lowland-Midland Mixe", + "level5": "Lowland Mixe" + }, + "mit": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Northern Baja Mixtec" + }, + "miu": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Tezoatlanic" + }, + "miv": { + "level0": "Bookkeeping" + }, + "miw": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Ankave-Tainae-Akoye", + "level3": "Tainae-Akoye" + }, + "mix": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Mixtepec-Yucunicoco Mixtec" + }, + "miy": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Guerrero Mixtec", + "level7": "Nuclear Guerrero Mixtec", + "level8": "Southwestern Guerrero Mixtec" + }, + "miz": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Northern Alta Mixtec" + }, + "mja": { + "level0": "Bookkeeping" + }, + "mjc": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Coast Mixtec", + "level7": "West Coast Mixtec" + }, + "mjd": { + "level0": "Maiduan" + }, + "mje": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Musguic" + }, + "mjg": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Southern Periphery Mongolic", + "level3": "Shirongol", + "level4": "Monguoric" + }, + "mjh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja", + "level9": "Nyanjaic" + }, + "mji": { + "level0": "Hmong-Mien", + "level1": "Mienic", + "level2": "Mien-Mun" + }, + "mjj": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Tibor", + "level7": "Nuclear Tibor" + }, + "mjk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Western Bel" + }, + "mjl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali" + }, + "mjm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Manamic linkage" + }, + "mjn": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Yupna", + "level4": "Bwana-Moam-Tapen" + }, + "mjo": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "mjp": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid" + }, + "mjq": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "mjr": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "mjs": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Kofyar-Mushere-Chip" + }, + "mjt": { + "level0": "Dravidian", + "level1": "North Dravidian", + "level2": "Kurux-Malto", + "level3": "Malto" + }, + "mjv": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Muthuvan-Mannan" + }, + "mjw": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Karbic" + }, + "mjx": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Santalic" + }, + "mjy": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Delawaran", + "level5": "Mahican-Woronoco-Pojassick" + }, + "mjz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Unclassified Tharu" + }, + "mka": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Unclassified Volta-Congo" + }, + "mkb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga" + }, + "mkc": { + "level0": "Nuclear Torricelli", + "level1": "Nuclear Maimai" + }, + "mkd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "South Slavic", + "level5": "Eastern South Slavic", + "level6": "Macedo-Bulgarian" + }, + "mke": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "mkf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2", + "level6": "Central West Chadic B.2" + }, + "mkg": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Then-MMS", + "level4": "Maonan-Mak-Sui", + "level5": "Mak-Ai-Cham" + }, + "mki": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Western Rajasthani", + "level11": "Indus Rajasthani" + }, + "mkj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Ponapeic" + }, + "mkk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Western A80", + "level10": "Makaaic", + "level11": "North-Central Makaaic" + }, + "mkl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede" + }, + "mkm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Moken-Moklen" + }, + "mkn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay" + }, + "mko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Southern Bikwin-Jen", + "level6": "Jen", + "level7": "Doso-Dza" + }, + "mkp": { + "level0": "Yareban", + "level1": "Doriri-Abia" + }, + "mkq": { + "level0": "Miwok-Costanoan", + "level1": "Miwokan", + "level2": "Eastern Miwokan" + }, + "mkr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Manep-Barem" + }, + "mks": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Central-Western Baja Mixtec" + }, + "mkt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Voh-Kone" + }, + "mku": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Manenkan", + "level10": "Konya-Manya" + }, + "mkv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "East Santo", + "level9": "Mafea-Tutuba" + }, + "mkw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "Southeastern Kikongo", + "level20": "Southern Kikongo", + "level21": "Koongo-Kituba" + }, + "mkx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "North Manobo", + "level5": "Kinamiguin-Bukidnon" + }, + "mky": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "South Halmahera", + "level6": "East Makian-Gane" + }, + "mkz": { + "level0": "Timor-Alor-Pantar", + "level1": "East Timor" + }, + "mla": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "South Santo" + }, + "mlb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Bati-Mbure-Yambassa", + "level10": "Mbure-Yambassa" + }, + "mlc": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai" + }, + "mld": { + "level0": "Bookkeeping" + }, + "mle": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Manambu-Yalaku" + }, + "mlf": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Phay-Pram", + "level3": "Tinic", + "level4": "Tin" + }, + "mlh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Trans Vitiaz", + "level5": "Huon Tip", + "level6": "Kate-Mape" + }, + "mli": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Masenrempulu" + }, + "mlj": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.3", + "level5": "Sokoroic", + "level6": "Miltuic" + }, + "mlk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Sabaki-Swahili" + }, + "mll": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Northern Malakula" + }, + "mlm": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Mulam-Kam" + }, + "mln": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Guadalcanal-Nggelic", + "level6": "Nuclear Guadalcanal-Nggelic", + "level7": "North and West Guadalcanal" + }, + "mlo": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Jola", + "level6": "FH-Jola", + "level7": "PF-Jola", + "level8": "Kwatay-Karon-Mlomp", + "level9": "Karon-Mlomp" + }, + "mlp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles" + }, + "mlq": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "West Manding", + "level9": "Xasonka" + }, + "mlr": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "Hurza" + }, + "mls": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Maba-Masalit", + "level3": "Macro-Masalit" + }, + "mlt": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic", + "level7": "Malta-Tunisian Arabic" + }, + "mlu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Central-Northern Malaita", + "level9": "North Malaitan" + }, + "mlv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "mlw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Tokombere" + }, + "mlx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Southwestern Malakula", + "level10": "Southwest Coastal Malekula" + }, + "mly": { + "level0": "Bookkeeping" + }, + "mma": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan", + "level6": "Nigerian Jarawan" + }, + "mmb": { + "level0": "Somahai" + }, + "mmc": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Mazahua" + }, + "mmd": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Then-MMS", + "level4": "Maonan-Mak-Sui", + "level5": "Maonan-Chadong" + }, + "mme": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Northwestern Malakula" + }, + "mmf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.4", + "level5": "Ronic", + "level6": "Mundat-Karfa" + }, + "mmg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Ambrym" + }, + "mmh": { + "level0": "Arawakan", + "level1": "Central-Eastern Maipuran", + "level2": "Central Maipuran", + "level3": "Xinguan Arawak", + "level4": "Waura-Mehinaku-Kustenau", + "level5": "Waura-Mehinaku" + }, + "mmi": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Tibor", + "level7": "Nuclear Tibor" + }, + "mmj": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric" + }, + "mmk": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Teluguic" + }, + "mml": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Angkuic", + "level5": "Southern Angkuic" + }, + "mmm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Epi-Efate", + "level7": "Epi", + "level8": "Bieria-Maii" + }, + "mmn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine" + }, + "mmo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage", + "level9": "Mapos-Mangga-Wagau" + }, + "mmp": { + "level0": "Amto-Musan" + }, + "mmq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "East Sogeram", + "level6": "Aisian" + }, + "mmr": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "North Hmongic" + }, + "mms": { + "level0": "Bookkeeping" + }, + "mmt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Ngero", + "level8": "Western Ngero" + }, + "mmu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Bati-Mbure-Yambassa", + "level10": "Mbure-Yambassa", + "level11": "Yambassa (A.60)", + "level12": "Mmala-Elip-Gunu" + }, + "mmv": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan II", + "level4": "Kotiria-Piratapuyo", + "level5": "Piratapuyic", + "level6": "Arapaso-Miriti" + }, + "mmw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Vanuatu-Loyalty Outliers" + }, + "mmx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Madak linkage" + }, + "mmy": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit", + "level6": "Dangla" + }, + "mmz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Libinzic", + "level13": "Libinza Fleuve" + }, + "mna": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage" + }, + "mnb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Munan", + "level9": "Munic", + "level10": "Western Munic" + }, + "mnc": { + "level0": "Tungusic", + "level1": "Manchu-Jurchen", + "level2": "Manchu-Xibe" + }, + "mnd": { + "level0": "Tupian", + "level1": "Monde", + "level2": "Gavianic" + }, + "mne": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Bagirmic", + "level6": "Morom-Jaya-Naba", + "level7": "Naba-Berakou" + }, + "mnf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Momo", + "level8": "Mundani-Njen" + }, + "mng": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau", + "level5": "Mnong" + }, + "mnh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic", + "level9": "Mid-Southern Central Core Bandaic" + }, + "mni": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga" + }, + "mnj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Eastern Iranian", + "level5": "Yidgha-Munji" + }, + "mnk": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "West Manding" + }, + "mnl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Central Santo" + }, + "mnm": { + "level0": "Dagan", + "level1": "Central Dagan", + "level2": "Southwest Dagan" + }, + "mnn": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau", + "level5": "Mnong", + "level6": "Southern-Central Mnong" + }, + "mnp": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Min", + "level3": "Inland Min" + }, + "mnq": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian", + "level4": "Maniq-Menraq-Batek", + "level5": "Menraq-Batek" + }, + "mnr": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Numic", + "level3": "Western Numic" + }, + "mns": { + "level0": "Uralic", + "level1": "Mansic", + "level2": "North-Central Mansi" + }, + "mnt": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Mayabic", + "level3": "Nuclear Mayabic" + }, + "mnu": { + "level0": "Mairasic" + }, + "mnv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian" + }, + "mnw": { + "level0": "Austroasiatic", + "level1": "Monic" + }, + "mnx": { + "level0": "East Bird's Head" + }, + "mny": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu" + }, + "mnz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Paniai Lakes" + }, + "moa": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Nwa-Ben", + "level4": "Wan-Mwan" + }, + "mob": { + "level0": "Bookkeeping" + }, + "moc": { + "level0": "Guaicuruan", + "level1": "Guaicuru del Sur", + "level2": "Qom" + }, + "mod": { + "level0": "Pidgin", + "level1": "Choctaw-based pidgin" + }, + "moe": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi" + }, + "mof": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Southern New England Algonquian", + "level6": "Western Southern New England Algonquian" + }, + "mog": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Mongondowic" + }, + "moh": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian", + "level2": "Mohawk-Oneida" + }, + "moi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bena-Mboi" + }, + "moj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "River Western Mundu-Baka", + "level8": "Monzomboic" + }, + "mom": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Tlapanec-Manguean", + "level3": "Manguean" + }, + "moo": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Hre-Sedang-Todrah-Monam" + }, + "mop": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Yucatecan" + }, + "mor": { + "level0": "Heibanic", + "level1": "West-Central Heibanic", + "level2": "Western Heibanic" + }, + "mos": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Mossi-Farefare", + "level14": "Mossic" + }, + "mot": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Southern Magdalenic" + }, + "mou": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit", + "level6": "Birgit-Mogum-Toram" + }, + "mov": { + "level0": "Cochimi-Yuman", + "level1": "Yuman", + "level2": "General Yuman", + "level3": "River Yuman" + }, + "mow": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Bobangic", + "level13": "Bobangic Riverain" + }, + "mox": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Bwaidoga linkage" + }, + "moy": { + "level0": "Ta-Ne-Omotic", + "level1": "Kefoid", + "level2": "South Gonga" + }, + "moz": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B" + }, + "mpa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic", + "level9": "Matengic" + }, + "mpb": { + "level0": "Northern Daly" + }, + "mpc": { + "level0": "Mangarrayi-Maran" + }, + "mpd": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Purus-Chamicuro", + "level3": "Purus", + "level4": "Yineic" + }, + "mpe": { + "level0": "Surmic" + }, + "mpf": { + "level0": "Bookkeeping" + }, + "mpg": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "North Masa", + "level4": "Marba-Musey" + }, + "mph": { + "level0": "Iwaidjan Proper" + }, + "mpi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Septentrional", + "level6": "Kotoko Septentrional 2" + }, + "mpj": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Martuwangkic", + "level4": "Warnman-Wangka" + }, + "mpk": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Musguic", + "level5": "Musgu-Mbara" + }, + "mpl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Watut" + }, + "mpm": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec", + "level8": "Chalcatongic" + }, + "mpn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Eastern Bel" + }, + "mpo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Pasismanua" + }, + "mpp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Trans Vitiaz", + "level5": "Huon Tip", + "level6": "Sopac" + }, + "mpq": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mayoruna Branch", + "level3": "Mayo Group", + "level4": "Matis subgroup" + }, + "mpr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "East New Georgia", + "level11": "Marovo-Vangunu" + }, + "mps": { + "level0": "Teberan" + }, + "mpt": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Mianic" + }, + "mpu": { + "level0": "Tupian", + "level1": "Arikem-Tupari", + "level2": "Tuparic" + }, + "mpv": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Boana" + }, + "mpw": { + "level0": "Arawakan", + "level1": "Negro-Roraima", + "level2": "Pidjanan" + }, + "mpx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Kilivila-Misima" + }, + "mpy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic" + }, + "mpz": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Bi-Ka" + }, + "mqa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "South Halmahera", + "level6": "Central-Eastern South Halmahera" + }, + "mqb": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "Hurza" + }, + "mqd": { + "level0": "Bookkeeping" + }, + "mqe": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Unclassified Hanseman" + }, + "mqf": { + "level0": "Somahai" + }, + "mqg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "East Borneo Malay" + }, + "mqh": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Tlazoyal-Penoles" + }, + "mqi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Batuley-Mariri" + }, + "mqj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Torajic" + }, + "mqk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "East and Central Manobo", + "level7": "East Manobo" + }, + "mql": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Oti-Volta Oriental", + "level10": "Waama-Tayari-Ditammari", + "level11": "Tayari-Ditammari", + "level12": "Ditammaric" + }, + "mqm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Distal", + "level13": "Marquesan" + }, + "mqn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki" + }, + "mqo": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran", + "level3": "Kao River" + }, + "mqp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "West Piru Bay", + "level5": "Hoamoal", + "level6": "West Hoamoal" + }, + "mqq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic", + "level7": "Kadazan-Sugut-Minokok", + "level8": "Sugut-Minokok Kadazan" + }, + "mqr": { + "level0": "Tor-Orya", + "level1": "Tor" + }, + "mqs": { + "level0": "North Halmahera" + }, + "mqt": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Angkuic", + "level5": "Southern Angkuic" + }, + "mqu": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Barian" + }, + "mqv": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Wamas-Samosa-Murupi-Mosimo" + }, + "mqw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Wamas-Samosa-Murupi-Mosimo" + }, + "mqx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi" + }, + "mqy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Manggaraiic", + "level6": "Manggarai Khusus" + }, + "mqz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Korap linkage" + }, + "mra": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Phay-Pram", + "level3": "Tinic" + }, + "mrb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Maewo" + }, + "mrc": { + "level0": "Cochimi-Yuman", + "level1": "Yuman", + "level2": "General Yuman", + "level3": "River Yuman" + }, + "mrd": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kham-Magar-Chepang", + "level4": "Magar" + }, + "mre": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "OKSLic" + }, + "mrg": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Tani", + "level3": "Eastern Tani" + }, + "mrh": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Maraic", + "level5": "Nuclear Maraic" + }, + "mri": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal", + "level14": "Maoric" + }, + "mrj": { + "level0": "Uralic", + "level1": "Mari" + }, + "mrk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Voh-Kone" + }, + "mrl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Eastern Trukic", + "level12": "Mortlockese-Trukese" + }, + "mrm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "mrn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Santa Isabel", + "level10": "East Santa Isabel" + }, + "mro": { + "level0": "Sino-Tibetan", + "level1": "Mruic" + }, + "mrp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "South-Central Santo" + }, + "mrq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Distal", + "level13": "Marquesan" + }, + "mrr": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Southeast Gondi" + }, + "mrs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Northwestern Malakula" + }, + "mrt": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Bura-Marghi", + "level6": "Marghic" + }, + "mru": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Northern Mbum", + "level6": "Tupuri-Mundang-Mambai", + "level7": "Mundangic" + }, + "mrv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Distal", + "level13": "Far East Polynesian" + }, + "mrw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Danaw" + }, + "mrx": { + "level0": "Tor-Orya", + "level1": "Tor" + }, + "mry": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Mansakan", + "level5": "Eastern Mansakan" + }, + "mrz": { + "level0": "Anim", + "level1": "Marind-Boazi-Yaqai", + "level2": "Marindic" + }, + "msb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Peripheral Central Bisayan", + "level7": "Masbate-Sorsogon" + }, + "msc": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Manenkan" + }, + "msd": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Meemul-Tziij" + }, + "mse": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "North Masa", + "level4": "Marba-Musey" + }, + "msf": { + "level0": "Nimboranic", + "level1": "Outer Nimboranic" + }, + "msg": { + "level0": "West Bird's Head", + "level1": "South West Bird's Head" + }, + "msh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "Southwestern Malagasic", + "level7": "South West-Central Malagasic", + "level8": "Nuclear South West-Central Malagasic", + "level9": "Inland-Western Malagasic", + "level10": "Western Malagasic" + }, + "msi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "East Borneo Malay", + "level6": "Banjar-Berau-Brunei Malay", + "level7": "Berau-Brunei Malay", + "level8": "Bruneic Malay" + }, + "msj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mbaic" + }, + "msk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Mansakan", + "level5": "Eastern Mansakan" + }, + "msm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "East and Central Manobo", + "level7": "East Manobo" + }, + "msn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "mso": { + "level0": "Mombum-Koneraw" + }, + "msp": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Jurunic", + "level3": "Unclassified Jurunic" + }, + "msq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Extreme Northern New Caledonian", + "level9": "Kum-Nel-Yua-Cac" + }, + "msr": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "RSLic", + "level3": "Nuclear RSLic" + }, + "mss": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Masela-South Babar" + }, + "mst": { + "level0": "Bookkeeping" + }, + "msu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Lower Markham", + "level9": "Busu", + "level10": "Musom-Sirak" + }, + "msv": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Septentrional", + "level6": "Kotoko Septentrional 1" + }, + "msw": { + "level0": "Atlantic-Congo" + }, + "msx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Osum-Wadaginam-Pomoikan", + "level5": "Pomoikan", + "level6": "Anamuxric" + }, + "msy": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ruboni", + "level3": "Mikarewan" + }, + "msz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Trans Vitiaz", + "level5": "Huon Tip", + "level6": "Sopac" + }, + "mta": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "South Manobo", + "level6": "Sarangani-Tasaday-Cotabato" + }, + "mtb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Northern Bia", + "level9": "Anyinic" + }, + "mtc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Kokon" + }, + "mtd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Ibanic", + "level5": "Iban-Mualang-Seberuang" + }, + "mte": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Mono-Uruavan" + }, + "mtf": { + "level0": "Lower Sepik", + "level1": "Nor" + }, + "mtg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Mek", + "level2": "Eastern Mek" + }, + "mth": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Central Yapen" + }, + "mti": { + "level0": "Dagan", + "level1": "Central Dagan" + }, + "mtj": { + "level0": "East Bird's Head", + "level1": "Meax" + }, + "mtk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Nkambe" + }, + "mtl": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Goemaic" + }, + "mtm": { + "level0": "Uralic", + "level1": "Samoyedic" + }, + "mtn": { + "level0": "Misumalpan", + "level1": "Sumalpan", + "level2": "Matagalpan" + }, + "mto": { + "level0": "Mixe-Zoque", + "level1": "Mixe", + "level2": "Oaxaca Mixe" + }, + "mtp": { + "level0": "Mataguayan", + "level1": "Mataguayo II", + "level2": "Wichi" + }, + "mtq": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Viet-Muong", + "level3": "Muongic" + }, + "mtr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewaric" + }, + "mts": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano", + "level5": "Yaminawa Complex" + }, + "mtt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "mtu": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Coast Mixtec", + "level7": "East Coast Mixtec" + }, + "mtv": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup", + "level4": "Nuclear Warup", + "level5": "Molet-Asaroo" + }, + "mtw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Negrosanon" + }, + "mtx": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec" + }, + "mty": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic" + }, + "mtz": { + "level0": "Bookkeeping" + }, + "mua": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Northern Mbum", + "level6": "Tupuri-Mundang-Mambai", + "level7": "Mundangic" + }, + "mub": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Mubic" + }, + "muc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Yemne-Kimbi" + }, + "mud": { + "level0": "Eskimo-Aleut", + "level1": "Aleutic" + }, + "mue": { + "level0": "Mixed Language", + "level1": "Spanish-Quechua" + }, + "mug": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Musguic", + "level5": "Musgu-Mbara" + }, + "muh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Eastern Mundu-Baka" + }, + "mui": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Central Sumatran Malay", + "level6": "Music" + }, + "muj": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit" + }, + "muk": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Mustangic" + }, + "mum": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage", + "level11": "Eastern Taupota" + }, + "muo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru", + "level7": "Sambaic", + "level8": "Samba-Leko-Perema-Mumbake", + "level9": "Perema-Mumbake" + }, + "mup": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "muq": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "North Hmongic" + }, + "mur": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southwest Surmic", + "level3": "Didinga-Murle" + }, + "mus": { + "level0": "Muskogean" + }, + "mut": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Northwest Gondi", + "level5": "Southwest Gondi", + "level6": "Muria" + }, + "muu": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Transversal Lowland East Cushitic" + }, + "muv": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Muthuvan-Mannan" + }, + "muw": { + "level0": "Bookkeeping" + }, + "mux": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Hagen", + "level3": "Melpa-Tembagla" + }, + "muy": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Tokombere", + "level7": "Madaic" + }, + "muz": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southeast Surmic", + "level3": "Pastoral Surmic", + "level4": "Tirma-Chai-Mursi" + }, + "mva": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Manamic linkage", + "level9": "Bam-Manam", + "level10": "Manam-Sepa" + }, + "mvb": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "California Athabaskan" + }, + "mvc": { + "level0": "Bookkeeping" + }, + "mvd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Sumba", + "level6": "Central-East Sumbanese" + }, + "mve": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Western Rajasthani", + "level11": "Indus Rajasthani" + }, + "mvf": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Eastern Mongolic", + "level3": "Khalkha-Buriat", + "level4": "Mongolian" + }, + "mvg": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Tlaxiacic", + "level8": "Yucuane-Teita" + }, + "mvh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.1", + "level5": "Sumrayic", + "level6": "Ndam-Tumak" + }, + "mvi": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Southern Ryukyu" + }, + "mvj": { + "level0": "Bookkeeping" + }, + "mvk": { + "level0": "Yuat" + }, + "mvl": { + "level0": "Pama-Nyungan", + "level1": "Paman" + }, + "mvn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage" + }, + "mvo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "East New Georgia", + "level11": "Marovo-Vangunu" + }, + "mvp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Masenrempulu" + }, + "mvq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Kumil" + }, + "mvr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Ansus-Ambai" + }, + "mvt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Northern Malakula", + "level8": "North Coast Malakula", + "level9": "Botovro-Vovo-Vao" + }, + "mvu": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Maba-Masalit", + "level3": "Macro-Maba" + }, + "mvv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic", + "level8": "Sumambu-Tagal" + }, + "mvw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Ruvuma", + "level9": "Unclassified Ruvuma" + }, + "mvx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Biakic" + }, + "mvy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Indus Kohistanic" + }, + "mvz": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Outer South Ethiopic", + "level6": "TT-Group" + }, + "mwa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Dobu-Duau linkage", + "level9": "Bunama-Mwatebu" + }, + "mwb": { + "level0": "Nuclear Torricelli", + "level1": "Marienberg", + "level2": "Mandi-Muniwara" + }, + "mwc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Are linkage", + "level10": "Are-Doga" + }, + "mwe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Ruvuma", + "level9": "Yaoic" + }, + "mwf": { + "level0": "Southern Daly" + }, + "mwg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Arawe", + "level11": "West Arawe" + }, + "mwh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Bibling" + }, + "mwi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Ninde-Nati" + }, + "mwj": { + "level0": "Bookkeeping" + }, + "mwk": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "West Manding", + "level9": "Kita-Kagoro" + }, + "mwl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Asturo-Leonese" + }, + "mwm": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Chari" + }, + "mwn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mwika", + "level10": "Fipaic", + "level11": "Maluwawaru" + }, + "mwo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Maewo" + }, + "mwp": { + "level0": "Pama-Nyungan" + }, + "mwq": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "South Peripheral Kuki-Chin", + "level5": "Choic", + "level6": "Daai-Nghmoye-Muun-Kaang" + }, + "mws": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Eastern Kirinyaga", + "level10": "Northern Kirinyaga", + "level11": "Nithi-Meru" + }, + "mwt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Moken-Moklen" + }, + "mwu": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "Baka-Beli", + "level3": "Morokodo-Beli", + "level4": "Gberi-Morokodo-Mittu" + }, + "mwv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran" + }, + "mww": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian", + "level7": "First Vernacular Hmong", + "level8": "Far Western Miao" + }, + "mwx": { + "level0": "Bookkeeping" + }, + "mwy": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Okiek-Akie" + }, + "mwz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Basoo" + }, + "mxa": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Central-Western Baja Mixtec" + }, + "mxb": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Tezoatlanic" + }, + "mxc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Core Shona", + "level10": "Plateau Shona" + }, + "mxd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Modang-Segai" + }, + "mxe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Vanuatu-Loyalty Outliers", + "level9": "Mele-Futuna" + }, + "mxf": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Septentrional", + "level6": "Kotoko Septentrional 2" + }, + "mxg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbala-Holu-Sondi (K.10)", + "level10": "Holu (K.10)", + "level11": "Pheende-Kwezo" + }, + "mxh": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe", + "level2": "Mangbutu-Efe", + "level3": "Leseic" + }, + "mxi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Unshifted Western Romance" + }, + "mxj": { + "level0": "Sino-Tibetan", + "level1": "Kman-Meyor" + }, + "mxk": { + "level0": "Bogia" + }, + "mxl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Fongbeic" + }, + "mxm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "Willaumez linkage", + "level7": "Nakanai-Meramera" + }, + "mxn": { + "level0": "West Bird's Head", + "level1": "Seget-Moi" + }, + "mxo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Greater Luyana", + "level8": "Eastern Greater Luyana" + }, + "mxp": { + "level0": "Mixe-Zoque", + "level1": "Mixe", + "level2": "Oaxaca Mixe", + "level3": "Lowland-Midland-South Highland Mixe" + }, + "mxq": { + "level0": "Mixe-Zoque", + "level1": "Mixe", + "level2": "Oaxaca Mixe", + "level3": "Lowland-Midland-South Highland Mixe", + "level4": "Lowland-Midland Mixe", + "level5": "Midland Mixe" + }, + "mxr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik" + }, + "mxs": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Teozacoalco Mixtec" + }, + "mxt": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Coast Mixtec", + "level7": "East Coast Mixtec" + }, + "mxu": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Tokombere", + "level7": "Madaic" + }, + "mxv": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Guerrero Mixtec", + "level7": "Coicoyan-Metlatonoc" + }, + "mxw": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Nambu", + "level3": "Namo-Len" + }, + "mxx": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Maninka-Mori" + }, + "mxy": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec" + }, + "mxz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Masela-South Babar" + }, + "mya": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Mranmaic", + "level6": "Nuclear Mranmaic" + }, + "myb": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone-Chari", + "level7": "Sido" + }, + "myd": { + "level0": "Bookkeeping" + }, + "mye": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "B10-B30" + }, + "myf": { + "level0": "Blue Nile Mao" + }, + "myg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Southwest Grassfields" + }, + "myh": { + "level0": "Wakashan", + "level1": "Southern Wakashan", + "level2": "Makah-Nitinat" + }, + "myi": { + "level0": "Bookkeeping" + }, + "myj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Feroge-Mangaya" + }, + "myk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "North Senufo" + }, + "myl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Greater Kaili", + "level6": "Kulawi" + }, + "mym": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southeast Surmic", + "level3": "Pastoral Surmic" + }, + "myo": { + "level0": "Ta-Ne-Omotic", + "level1": "Kefoid" + }, + "myq": { + "level0": "Bookkeeping" + }, + "mys": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Outer South Ethiopic", + "level6": "TT-Group", + "level7": "Peripheral Western Gurage" + }, + "myt": { + "level0": "Bookkeeping" + }, + "myu": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Mundurukuic" + }, + "myv": { + "level0": "Uralic", + "level1": "Mordvin" + }, + "myw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Kilivila-Misima", + "level8": "Kilivilic", + "level9": "Kilivila-Muyuw" + }, + "myx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Northern Luyia" + }, + "myy": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Western Eastern Tucanoan", + "level3": "Barasano-Eduria-Macuna" + }, + "myz": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Macro-Mandaic" + }, + "mza": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Coast Mixtec", + "level7": "West Coast Mixtec" + }, + "mzb": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic", + "level4": "Northern Saharan Oasis Berber" + }, + "mzc": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "West Scandinavian Sign", + "level4": "Norwegian Sign" + }, + "mzd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Dualaic", + "level9": "Duala-Malimba" + }, + "mze": { + "level0": "Mailuan" + }, + "mzf": { + "level0": "Bookkeeping" + }, + "mzg": { + "level0": "Sign Language", + "level1": "Auxiliary Sign Systems" + }, + "mzh": { + "level0": "Mataguayan", + "level1": "Mataguayo II", + "level2": "Wichi" + }, + "mzi": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Valley Mazatec", + "level6": "Ayautlic", + "level7": "Northern Baja Mazatec" + }, + "mzj": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding", + "level9": "Manenkan", + "level10": "Konya-Manya" + }, + "mzk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mambila" + }, + "mzl": { + "level0": "Mixe-Zoque", + "level1": "Mixe", + "level2": "Oaxaca Mixe", + "level3": "Lowland-Midland-South Highland Mixe", + "level4": "Lowland-Midland Mixe", + "level5": "Lowland Mixe" + }, + "mzm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Mumuyic" + }, + "mzn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Caspian", + "level8": "Mazanderani-Shahmirzadi" + }, + "mzo": { + "level0": "Cariban", + "level1": "Kuikuroan", + "level2": "Nuclear Kuikuroan" + }, + "mzq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Western Bungku-Tolaki", + "level8": "Interior Bungku-Tolaki" + }, + "mzr": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Marubo Subgroup" + }, + "mzs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Luso-Asian Creole" + }, + "mzt": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian", + "level4": "Maniq-Menraq-Batek", + "level5": "Menraq-Batek", + "level6": "Batekic" + }, + "mzu": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Tamolan" + }, + "mzv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Oriental", + "level5": "Gbanu-Manza-Ngbaka", + "level6": "Manza-Ngbaka", + "level7": "Manzaic" + }, + "mzw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Chakalic" + }, + "mzx": { + "level0": "Bookkeeping" + }, + "mzy": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "mzz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Bwaidoga linkage", + "level9": "Iamalelic" + }, + "naa": { + "level0": "Namla-Tofanma" + }, + "nab": { + "level0": "Nambiquaran", + "level1": "Nambikwara Complex" + }, + "nac": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Jimi", + "level3": "Kandawo-Narak" + }, + "nad": { + "level0": "Bookkeeping" + }, + "nae": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers", + "level4": "Amalumute", + "level5": "Northwest Seram", + "level6": "Ulat Inai" + }, + "naf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Rawlinson", + "level5": "Sankwep" + }, + "nag": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Kamrupa", + "level10": "Eastern Kamrupa" + }, + "naj": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Naluic" + }, + "nak": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "Willaumez linkage", + "level7": "Nakanai-Meramera" + }, + "nal": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tungak-Nalik" + }, + "nam": { + "level0": "Southern Daly" + }, + "nan": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Min", + "level3": "Coastal Min" + }, + "nao": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Sherpa-Jirel", + "level9": "Sherpaic" + }, + "nap": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Italo-Dalmatian", + "level9": "Italian Romance" + }, + "naq": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Khoekhoe", + "level3": "North Khoekhoe" + }, + "nar": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos" + }, + "nas": { + "level0": "South Bougainville", + "level1": "Nasioiic", + "level2": "Nasioi", + "level3": "South-Central Nasioi", + "level4": "Central Nasioi" + }, + "nat": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Kamuku-Hungwarya" + }, + "nau": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Kosraean-Nauruan" + }, + "nav": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Apachean", + "level4": "Southwestern Apachean", + "level5": "Western Southwestern Apachean" + }, + "naw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "Mountain Oti North Guang" + }, + "nax": { + "level0": "Left May", + "level1": "Western Left May", + "level2": "Nimo-Nakwi" + }, + "nay": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Lower Murray", + "level4": "Yaraldi-Keramin-Yitha" + }, + "naz": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Western Periphery-North Guerrero Nahuatl", + "level6": "North Guerrero Nahuatl" + }, + "nba": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Ngangela-Nyemba" + }, + "nbb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid" + }, + "nbc": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southwestern Patkaian", + "level5": "Chang-Phom-Konyak" + }, + "nbd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Ngbele-Ngenda", + "level15": "Ngendan" + }, + "nbe": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southwestern Patkaian", + "level5": "Chang-Phom-Konyak", + "level6": "Konyak-Phom" + }, + "nbf": { + "level0": "Bookkeeping" + }, + "nbg": { + "level0": "Unattested", + "level1": "Dravidian (Unattested)" + }, + "nbh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Nuclear Boleic", + "level8": "Galambu-Bele", + "level9": "Kirfi-Bele", + "level10": "Ngamo-Bele" + }, + "nbi": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Angami-Mao", + "level5": "Naga Maoic" + }, + "nbj": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Ngumpin", + "level4": "Eastern Ngumpin", + "level5": "Ngumpit" + }, + "nbk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Unclassified Hanseman" + }, + "nbm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "River Western Mundu-Baka", + "level8": "Bwaka" + }, + "nbn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "Banda-Geser", + "level4": "Seran Laut", + "level5": "Koiwai-Irarutu", + "level6": "Irarutic" + }, + "nbo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross", + "level7": "Lokoic", + "level8": "Lubila-Lokaa" + }, + "nbp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid", + "level7": "Bakor-Ejagham", + "level8": "Bakor", + "level9": "Northern Bakor", + "level10": "Nnam-Ekajuk" + }, + "nbq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani" + }, + "nbr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic" + }, + "nbs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic", + "level3": "South African Sign" + }, + "nbt": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Tani", + "level3": "Pre-Western Tani", + "level4": "Western Tani", + "level5": "Subansiri", + "level6": "Bangni-Tagin" + }, + "nbu": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Zemeic" + }, + "nbv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Momo", + "level8": "Widikum-Tadkon" + }, + "nbw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic", + "level7": "Nuclear Ngbandic" + }, + "nbx": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Badjiri-Eastern Karnic", + "level3": "Eastern Karnic" + }, + "nby": { + "level0": "Border", + "level1": "Bewani" + }, + "nca": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Gusap-Mot", + "level4": "Ufim-Rawa-Nahu" + }, + "ncb": { + "level0": "Austroasiatic", + "level1": "Nicobaric", + "level2": "Nuclear Nicobaric", + "level3": "Central Nicobar" + }, + "ncc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus" + }, + "ncd": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Khambu", + "level6": "Kulungic" + }, + "ncf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tabar linkage" + }, + "ncg": { + "level0": "Tsimshian", + "level1": "Nishga-Gitxsan" + }, + "nch": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Huasteca Nahuatl" + }, + "nci": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl" + }, + "ncj": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl" + }, + "nck": { + "level0": "Maningrida", + "level1": "Nakkara-Ndjebbana" + }, + "ncl": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Western Periphery-North Guerrero Nahuatl", + "level6": "Western Periphery Nahuatl" + }, + "ncm": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Nambu" + }, + "ncn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "South-East Admiralty" + }, + "nco": { + "level0": "South Bougainville", + "level1": "Nasioiic" + }, + "ncq": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "West Katuic", + "level3": "Brou-So", + "level4": "Eastern Bru-Katang", + "level5": "Katang" + }, + "ncr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Nsari-Nooni-Ncane", + "level8": "Nooni-Ncane", + "level9": "Ncane-Cung" + }, + "ncs": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "nct": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Kolhrengic", + "level5": "Tarao-Chothe" + }, + "ncu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "River Oti North Guang", + "level10": "Chumbuli" + }, + "ncx": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl" + }, + "nda": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ndasaic", + "level8": "Samayic", + "level9": "Ndasa-Wumbvu" + }, + "ndb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "South Ring", + "level9": "Babungoic" + }, + "ndc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Core Shona" + }, + "ndd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid", + "level7": "Bakor-Ejagham", + "level8": "Bakor", + "level9": "Nde-Efutop" + }, + "nde": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Nguni (S.40)", + "level12": "Nuclear Nguni", + "level13": "Southern Ndebele-Lowland", + "level14": "Swatic" + }, + "ndg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic", + "level9": "Matumbic" + }, + "ndh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Nyakyusa-Ndali" + }, + "ndi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru", + "level7": "Sambaic", + "level8": "Samba-Leko-Perema-Mumbake" + }, + "ndj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Kilombero" + }, + "ndk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Lebonya", + "level14": "Bantu D33", + "level15": "Budu-Ndaka-Mbo", + "level16": "Ndaka-Mbo" + }, + "ndl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Bamweic" + }, + "ndm": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.1", + "level5": "Sumrayic", + "level6": "Ndam-Tumak" + }, + "ndn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Bwamba-Ngondi-Pande-Mbati-Aka" + }, + "ndo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Ndonga (R.20)", + "level12": "Kwambi-Ndonga" + }, + "ndp": { + "level0": "Central Sudanic", + "level1": "Membi-Mangbutu-Efe" + }, + "ndq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Unclassified Kunene" + }, + "ndr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid" + }, + "nds": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Alts\u00e4chsisch", + "level7": "Middle-Modern Low German", + "level8": "Low German", + "level9": "Greater East Low German" + }, + "ndt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mbaic", + "level6": "Ndunga-Mba-Dongo", + "level7": "Ndunga-Mba" + }, + "ndu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru", + "level7": "Diic" + }, + "ndv": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Cangin", + "level3": "Palor-Ndut" + }, + "ndw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Ubangi", + "level11": "Ngiri Riverain Ubangi-Ripuaire", + "level12": "Libinzic", + "level13": "Libinza Fleuve" + }, + "ndx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Ngalik-Nduga" + }, + "ndy": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Nduga-Luto" + }, + "ndz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Sere-Indri", + "level7": "Sere-Bviri", + "level8": "Ndogo-Sere" + }, + "neb": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Mano-Dan", + "level4": "Guro-Dan", + "level5": "Dan-Toura", + "level6": "Toura-Goo" + }, + "nec": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "Pantar" + }, + "ned": { + "level0": "Bookkeeping" + }, + "nee": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Extreme Northern New Caledonian", + "level9": "Kum-Nel-Yua-Cac", + "level10": "Kum-Nel-Yua" + }, + "nef": { + "level0": "Pidgin", + "level1": "Assamese-based pidgin" + }, + "neg": { + "level0": "Tungusic", + "level1": "Northeastern Tungusic", + "level2": "Northern Tungusic", + "level3": "Negidalic" + }, + "neh": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Phobjib-Chali-Bumthangic" + }, + "nej": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Gusap-Mot", + "level4": "Gira-Neko-Nekgini" + }, + "nek": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian" + }, + "nem": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Nmi-Pij-Fwa-Pam-Pap", + "level10": "Nmi-Fij-Fwa", + "level11": "Hyenghene" + }, + "nen": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Loyalty Islands" + }, + "neo": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "East Hmongic", + "level5": "South Qiandongic Miao" + }, + "neq": { + "level0": "Mixe-Zoque", + "level1": "Mixe", + "level2": "Oaxaca Mixe", + "level3": "Lowland-Midland-South Highland Mixe", + "level4": "Lowland-Midland Mixe", + "level5": "Midland Mixe" + }, + "ner": { + "level0": "Konda-Yahadian" + }, + "nes": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Lahauli-Spiti" + }, + "net": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Engan", + "level3": "Outer Enga" + }, + "neu": { + "level0": "Artificial Language" + }, + "nev": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Nuclear West Bahnaric" + }, + "new": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Newaric", + "level4": "Newar" + }, + "nex": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Nambu" + }, + "ney": { + "level0": "Kru", + "level1": "Eastern Kru", + "level2": "Neyo-Dida" + }, + "nez": { + "level0": "Sahaptian" + }, + "nfa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Hawu-Dhao" + }, + "nfd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau" + }, + "nfg": { + "level0": "Bookkeeping" + }, + "nfk": { + "level0": "Bookkeeping" + }, + "nfl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Reefs-Santa Cruz" + }, + "nfr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "South Senufo" + }, + "nfu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Nkambe", + "level9": "Mfumteic", + "level10": "Central-Southern Mfumte" + }, + "nga": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Oriental", + "level5": "Gbanu-Manza-Ngbaka", + "level6": "Manza-Ngbaka" + }, + "ngb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic", + "level7": "Nuclear Ngbandic" + }, + "ngc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ngombe-Genja" + }, + "ngd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Mokiba-Ngando" + }, + "nge": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic", + "level10": "Mankonic" + }, + "ngg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Oriental", + "level5": "Gbanu-Manza-Ngbaka", + "level6": "Manza-Ngbaka", + "level7": "Manzaic", + "level8": "Ngbaka-Manza-Ali" + }, + "ngh": { + "level0": "Tuu", + "level1": "!Ui", + "level2": "Ghaap-Kalahari" + }, + "ngi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.1", + "level5": "Ngizim-Southwestern Bade" + }, + "ngj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Momo", + "level8": "Ngie-Oshie" + }, + "ngk": { + "level0": "Gunwinyguan" + }, + "ngl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe", + "level9": "Lomweic" + }, + "ngm": { + "level0": "Speech Register", + "level1": "Indo-European Speech Register" + }, + "ngn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Momo" + }, + "ngp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "West Ruvu", + "level11": "Seuta", + "level12": "Zigua-Nguu" + }, + "ngq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "South Mara", + "level12": "Western Serengeti" + }, + "ngr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Reefs-Santa Cruz" + }, + "ngs": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Dghwedeic" + }, + "ngt": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "Ta'oihic" + }, + "ngu": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl" + }, + "ngv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan" + }, + "ngw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic" + }, + "ngx": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Bura-Marghi", + "level6": "Buraic" + }, + "ngy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Bafia (A.50)" + }, + "ngz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Ngungwel-Eboo" + }, + "nha": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Kartu-Nhanda" + }, + "nhb": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Nwa-Ben", + "level4": "Ben-Gban", + "level5": "Bengic" + }, + "nhc": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Isthmus-Pipil Nahuatl", + "level6": "Isthmus Nahuatl" + }, + "nhd": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.A", + "level8": "Paraguay-Brazil Guarani" + }, + "nhe": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Huasteca Nahuatl" + }, + "nhf": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda" + }, + "nhg": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl" + }, + "nhh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Halbic" + }, + "nhi": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl", + "level6": "Tlaxcala-Southeastern Puebla Nahuatl" + }, + "nhj": { + "level0": "Bookkeeping" + }, + "nhk": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Isthmus-Pipil Nahuatl", + "level6": "Isthmus Nahuatl" + }, + "nhm": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl" + }, + "nhn": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl", + "level6": "Tlaxcala-Southeastern Puebla Nahuatl", + "level7": "Tlaxcala-Puebla-Pastoral Nahuatl" + }, + "nho": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "Central Northern Outlier Polynesian", + "level12": "Takuuic" + }, + "nhp": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Isthmus-Pipil Nahuatl", + "level6": "Isthmus Nahuatl" + }, + "nhq": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Sierra de Puebla Nahuatl" + }, + "nhr": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "West-Kxoe", + "level4": "Naro-Ana" + }, + "nhs": { + "level0": "Bookkeeping" + }, + "nht": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl" + }, + "nhu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Beboid", + "level6": "Eastern Beboid", + "level7": "Nsari-Nooni-Ncane", + "level8": "Nooni-Ncane" + }, + "nhv": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Western Periphery-North Guerrero Nahuatl" + }, + "nhw": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Huasteca Nahuatl" + }, + "nhx": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Isthmus-Pipil Nahuatl", + "level6": "Isthmus Nahuatl" + }, + "nhy": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl", + "level6": "Tlaxcala-Southeastern Puebla Nahuatl", + "level7": "Southeast Puebla-Northern Oaxaca Nahuatl" + }, + "nhz": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl", + "level6": "Tlaxcala-Southeastern Puebla Nahuatl", + "level7": "Tlaxcala-Puebla-Pastoral Nahuatl" + }, + "nia": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Central Barrier Islands" + }, + "nib": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Boana" + }, + "nid": { + "level0": "Gunwinyguan", + "level1": "Eastern Gunwinyguan" + }, + "nie": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Riverine Bua", + "level6": "Bua-Lua" + }, + "nif": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Boana", + "level5": "Nek-Nuk" + }, + "nig": { + "level0": "Gunwinyguan", + "level1": "Jala" + }, + "nih": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Nyika-Lambya", + "level10": "Nyika", + "level11": "Central and Southern Nyika", + "level12": "Mbozi-Malawi Nyika" + }, + "nii": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Wahgic" + }, + "nij": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "South West Greater Barito" + }, + "nik": { + "level0": "Austroasiatic", + "level1": "Nicobaric", + "level2": "Nuclear Nicobaric", + "level3": "Central Nicobar" + }, + "nil": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Teun-Nila-Serua", + "level5": "Nila-Serua" + }, + "nim": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Nyaturu-Nilamba" + }, + "nin": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic", + "level5": "Rukubic", + "level6": "Mada-Ninzam" + }, + "nio": { + "level0": "Uralic", + "level1": "Samoyedic" + }, + "niq": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Central Kalenjin", + "level4": "Plateau Central Kalenjin", + "level5": "Western Plateau Central Kalenjin" + }, + "nir": { + "level0": "Nimboranic" + }, + "nis": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Sauk-Nimi" + }, + "nit": { + "level0": "Dravidian", + "level1": "Central Dravidian", + "level2": "Kolami-Naiki" + }, + "niu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Tongic" + }, + "niv": { + "level0": "Nivkh" + }, + "niw": { + "level0": "Left May", + "level1": "Western Left May", + "level2": "Nimo-Nakwi" + }, + "nix": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "North Rutara" + }, + "niy": { + "level0": "Central Sudanic", + "level1": "Lenduic" + }, + "niz": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Ningil-Yil" + }, + "nja": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Nzanyic" + }, + "njb": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "North Patkaian", + "level4": "Noctean" + }, + "njd": { + "level0": "Bookkeeping" + }, + "njh": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Central Naga" + }, + "nji": { + "level0": "Mirndi", + "level1": "Ngurlun" + }, + "njj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Momo", + "level8": "Mundani-Njen" + }, + "njl": { + "level0": "Dajuic", + "level1": "Western Dajuic" + }, + "njm": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Angami-Mao", + "level5": "Angami-Chokri" + }, + "njn": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Zemeic", + "level3": "Nuclear Zemeic" + }, + "njo": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Central Naga" + }, + "njr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mambila", + "level11": "Njerup" + }, + "njs": { + "level0": "Geelvink Bay" + }, + "njt": { + "level0": "Pidgin", + "level1": "Trio-based pidgin" + }, + "nju": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Mirning" + }, + "njx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kamba-Kunyi" + }, + "njy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Mpoic", + "level10": "Njemic" + }, + "njz": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Tani", + "level3": "Pre-Western Tani", + "level4": "Western Tani", + "level5": "Subansiri" + }, + "nka": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban" + }, + "nkb": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Tangkhul-Maring", + "level3": "Maringic" + }, + "nkc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Lundu-Balong (A.10)", + "level8": "Greater Manenguba" + }, + "nkd": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Kolhrengic" + }, + "nke": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "West New Georgia" + }, + "nkf": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Zemeic" + }, + "nkg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Gusap-Mot", + "level4": "Gira-Neko-Nekgini" + }, + "nkh": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Angami-Mao" + }, + "nki": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Zemeic" + }, + "nkj": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Tangko-Nakai" + }, + "nkk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Cape Cumberland" + }, + "nkm": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Nambu" + }, + "nkn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Chokwe-Ngangela-Nyemba (K.20)", + "level11": "Ngangela-Nyemba" + }, + "nko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Nkonya-Nkami" + }, + "nkp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "East Uvean-Niuafo'ou" + }, + "nkq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Nkonya-Nkami" + }, + "nkr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Carolinean Outlier Polynesian" + }, + "nks": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Asmat" + }, + "nkt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Nyika-Lambya", + "level10": "Nyika" + }, + "nku": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Kulango-Lorom", + "level5": "Kulango" + }, + "nkv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Nyika-Lambya", + "level10": "Nyika", + "level11": "Central and Southern Nyika" + }, + "nkw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Nkutsu-Lokenye", + "level12": "Songomenic" + }, + "nkx": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Eastern Ijo", + "level3": "Nikio" + }, + "nky": { + "level0": "Bookkeeping" + }, + "nkz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Ibuoroic", + "level8": "Ibuoro-ItuMbuso-Nkari" + }, + "nla": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "West Bamileke" + }, + "nlc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Mek", + "level2": "Western Mek" + }, + "nld": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Global Dutch" + }, + "nle": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia", + "level14": "Kabarasi-Tachoni-Nyala East" + }, + "nlg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Guadalcanal-Nggelic", + "level6": "Nuclear Guadalcanal-Nggelic", + "level7": "Nggelic" + }, + "nli": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Gawarbatic", + "level5": "Shumashtic" + }, + "nlj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Lebonya", + "level14": "Bantu D33", + "level15": "Vanuma-Nyali" + }, + "nlk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Ngalik-Nduga", + "level3": "Yalic" + }, + "nlm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Indus Kohistanic", + "level9": "Outer Indus Kohistani", + "level10": "Bateri-Mankiyali" + }, + "nlo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu" + }, + "nlq": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southwestern Patkaian" + }, + "nlr": { + "level0": "Bookkeeping" + }, + "nlu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "River Oti North Guang", + "level10": "Nchumbulu-Dwang" + }, + "nlv": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl" + }, + "nlw": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Norman Pama" + }, + "nlx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Pauri-Nahali" + }, + "nly": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Northern Ngayarda" + }, + "nlz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Reefs-Santa Cruz", + "level6": "Natugu-Nalogo" + }, + "nma": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Zemeic" + }, + "nmb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Northwestern Malakula" + }, + "nmc": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone-Chari", + "level7": "Sido" + }, + "nmd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Mbere (B.60)", + "level19": "Tsitsekeic", + "level20": "Lekaningic" + }, + "nme": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Zemeic", + "level3": "Nuclear Zemeic", + "level4": "Mzieme-Zeme" + }, + "nmf": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Tangkhul-Maring", + "level3": "Tangkhulic", + "level4": "Nuclear Tangkhulic" + }, + "nmg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Western A80", + "level10": "Mvumboic", + "level11": "Kwasio-Gyele" + }, + "nmh": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Moyon-Monsang Naga" + }, + "nmi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Boleic", + "level7": "Unclassified Boleic" + }, + "nmj": { + "level0": "Bookkeeping" + }, + "nmk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu" + }, + "nml": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields" + }, + "nmm": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic", + "level5": "Gurungic", + "level6": "Manangba-Nar-Phu" + }, + "nmn": { + "level0": "Tuu", + "level1": "Hua", + "level2": "Taa" + }, + "nmo": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Moyon-Monsang Naga" + }, + "nmp": { + "level0": "Nyulnyulan", + "level1": "Western Nyulnyulan" + }, + "nmq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Kalanga-Nambya" + }, + "nmr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Unclassified Samba-Duru" + }, + "nms": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Southwestern Malakula" + }, + "nmt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Satawalese-Carolinian", + "level12": "Macro-Carolinian" + }, + "nmu": { + "level0": "Maiduan" + }, + "nmv": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Central Karnic", + "level3": "Western Central Karnic", + "level4": "Pirlatapa-Dieric", + "level5": "Dieric" + }, + "nmw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Nimoa-Sudest" + }, + "nmx": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Nambu", + "level3": "Nama-Dre" + }, + "nmy": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Naic" + }, + "nmz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Yom-Nawdm" + }, + "nna": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Marrngu" + }, + "nnb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Rwenzori" + }, + "nnc": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.2", + "level5": "East Chadic A.2 1" + }, + "nnd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Ambae" + }, + "nne": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Ndonga (R.20)", + "level12": "Unclassified Ndonga (R.20)" + }, + "nnf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Gusap-Mot", + "level4": "Unclassified Gusap-Mot" + }, + "nng": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Tangkhul-Maring", + "level3": "Maringic" + }, + "nnh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "West Bamileke", + "level11": "Bamboutos" + }, + "nni": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Patakai-Manusela", + "level4": "Patakai" + }, + "nnj": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Teso-Turkana", + "level4": "Turkanic" + }, + "nnk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Yupna" + }, + "nnl": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Pochuri-Northern Rengma" + }, + "nnm": { + "level0": "Sepik", + "level1": "Yellow River" + }, + "nnn": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "South Masa", + "level4": "Peveic", + "level5": "Hede-Ngide" + }, + "nnp": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southwestern Patkaian", + "level5": "Wanchoic" + }, + "nnq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic", + "level9": "Matengic", + "level10": "Ndendeule-Ngindo" + }, + "nnr": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Southern Thura-Yura" + }, + "nnt": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Nanticoke-Conoy" + }, + "nnu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Guang", + "level7": "North Guang", + "level8": "Oti North Guang", + "level9": "River Oti North Guang", + "level10": "Nchumbulu-Dwang" + }, + "nnv": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Unclassified Core Thura-Yura" + }, + "nnw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "Northern Grusi", + "level8": "Nuna-Kasem", + "level9": "Nuni" + }, + "nnx": { + "level0": "Bookkeeping" + }, + "nny": { + "level0": "Tangkic", + "level1": "Southern Tangkic", + "level2": "Kayardild-Yangkaal" + }, + "nnz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "East Bamileke" + }, + "noa": { + "level0": "Chocoan" + }, + "noc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Boana", + "level5": "Nek-Nuk" + }, + "nod": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Southern Shanic", + "level11": "Yuanic" + }, + "noe": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "nof": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Simbu", + "level3": "Chuave-Nomane" + }, + "nog": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Southeast Kipchak", + "level5": "South Kipchak" + }, + "noh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Cromwell", + "level5": "Dallman" + }, + "noi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Vasave-Noiri" + }, + "noj": { + "level0": "Huitotoan", + "level1": "Nonuya-Ocaina" + }, + "nok": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "South Georgia Central Salish" + }, + "nom": { + "level0": "Bookkeeping" + }, + "non": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "West Scandinavian" + }, + "noo": { + "level0": "Bookkeeping" + }, + "nop": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Boana" + }, + "noq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Nsong-Mpiin-Ngong" + }, + "nor": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "West Scandinavian" + }, + "nos": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nisu-Nyisu", + "level8": "Nisu", + "level9": "Nuclear Nisu", + "level10": "Northern Nisu" + }, + "not": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran" + }, + "nou": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Coastal Binanderean" + }, + "nov": { + "level0": "Artificial Language" + }, + "now": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "South Rutara" + }, + "noy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Riverine Bua", + "level6": "Unclassified Riverine Bua" + }, + "noz": { + "level0": "Dizoid" + }, + "npa": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic", + "level5": "Gurungic", + "level6": "Manangba-Nar-Phu" + }, + "npb": { + "level0": "Bookkeeping" + }, + "npg": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southeastern Patkaian", + "level5": "Lainongic", + "level6": "Khiamniungic" + }, + "nph": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southwestern Patkaian", + "level5": "Chang-Phom-Konyak", + "level6": "Konyak-Phom" + }, + "npi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Indo-Aryan Northern zone", + "level8": "Eastern Pahari" + }, + "npl": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Central Nahuatl", + "level6": "Tlaxcala-Southeastern Puebla Nahuatl", + "level7": "Southeast Puebla-Northern Oaxaca Nahuatl" + }, + "npn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus II" + }, + "npo": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Pochuri-Northern Rengma" + }, + "nps": { + "level0": "Nuclear Trans New Guinea", + "level1": "Mek", + "level2": "Western Mek" + }, + "npu": { + "level0": "Bookkeeping" + }, + "npy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Badaic-Limola", + "level5": "Badaic" + }, + "nqg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Eastern Ede", + "level8": "Southeastern Ede" + }, + "nqk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Western Ede" + }, + "nql": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia" + }, + "nqm": { + "level0": "Kolopom" + }, + "nqn": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Nambu" + }, + "nqo": { + "level0": "Artificial Language" + }, + "nqq": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southwestern Patkaian", + "level5": "Wanchoic" + }, + "nqt": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Kofyar-Mushere-Chip", + "level7": "Kofyaric" + }, + "nqy": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "nra": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ngomic", + "level8": "Nuclear Ngomic", + "level9": "Akeleic" + }, + "nrc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Continental Transalpine Celtic", + "level6": "Unclassified Continental Transalpine Celtic" + }, + "nre": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Rengma-Simi" + }, + "nrg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "South-Central Santo" + }, + "nri": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Angami-Mao", + "level5": "Angami-Chokri" + }, + "nrk": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Northern Ngayarda" + }, + "nrl": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda", + "level5": "Ngarluma-Kariyarra" + }, + "nrm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Lower Baram", + "level6": "Central Lower Baram B" + }, + "nrp": { + "level0": "Unclassifiable" + }, + "nrr": { + "level0": "Bookkeeping" + }, + "nrt": { + "level0": "Kalapuyan" + }, + "nru": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Naic", + "level4": "Naish" + }, + "nrx": { + "level0": "Unattested", + "level1": "Umbugarla (Unattested)" + }, + "nrz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "West Central Papuan linkage", + "level9": "Nuclear West Central Papuan linkage" + }, + "nsa": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Central Naga" + }, + "nsb": { + "level0": "Tuu", + "level1": "Hua" + }, + "nsc": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "nsd": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nisu-Nyisu", + "level8": "Nisu", + "level9": "Nuclear Nisu" + }, + "nse": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi" + }, + "nsf": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nisu-Nyisu", + "level8": "Nisu" + }, + "nsg": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Ongamo-Maa" + }, + "nsh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Momo", + "level8": "Ngie-Oshie" + }, + "nsi": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "American Sign" + }, + "nsk": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Cree-Montagnais-Naskapi" + }, + "nsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "West Scandinavian Sign", + "level4": "Norwegian Sign" + }, + "nsm": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Rengma-Simi" + }, + "nsn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic" + }, + "nso": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Sotho-Tswana (S.30)", + "level11": "Northern Sotho", + "level12": "Sepedic" + }, + "nsp": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Indo-Pakistani-Nepalese Sign" + }, + "nsq": { + "level0": "Miwok-Costanoan", + "level1": "Miwokan", + "level2": "Eastern Miwokan", + "level3": "Sierra Miwokan" + }, + "nsr": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic" + }, + "nss": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Koro-Lele-Nali-Titan" + }, + "nst": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "North Patkaian", + "level4": "Tangsa" + }, + "nsu": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Sierra de Puebla Nahuatl" + }, + "nsv": { + "level0": "Bookkeeping" + }, + "nsw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Central Santo" + }, + "nsx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbundu (H.20)" + }, + "nsy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran" + }, + "nsz": { + "level0": "Maiduan" + }, + "ntd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic", + "level8": "Sumambu-Tagal", + "level9": "Tidung-Bulusu", + "level10": "Tidung" + }, + "nte": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Koti-Nathembo" + }, + "nti": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Samu" + }, + "ntj": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Pintupic", + "level4": "Nuclear Pintupic", + "level5": "Wangkatja-Tjarra", + "level6": "Tjarra" + }, + "ntk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "South Mara", + "level12": "Western Serengeti", + "level13": "Southeast Mara" + }, + "ntm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Oti-Volta Oriental", + "level10": "Waama-Tayari-Ditammari", + "level11": "Tayari-Ditammari" + }, + "nto": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Mongoic", + "level11": "Bolia-Ntomba" + }, + "ntp": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tepiman", + "level3": "Tepehuan" + }, + "ntr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Eastern Grusi", + "level9": "Tem-Chala", + "level10": "Bago-Delo-Cala", + "level11": "Delo-Cala" + }, + "nts": { + "level0": "Bookkeeping" + }, + "ntu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Reefs-Santa Cruz", + "level6": "Natugu-Nalogo" + }, + "ntw": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian", + "level2": "Tuscarora-Nottoway" + }, + "ntx": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "nty": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Mondzish", + "level4": "Nuclear Mondzish", + "level5": "Munji-Mantsi" + }, + "ntz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Central Iran Kermanic", + "level8": "Nuclear Central Iran Kermanic", + "level9": "Kashanic" + }, + "nua": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Extreme Northern New Caledonian", + "level9": "Kum-Nel-Yua-Cac", + "level10": "Kum-Nel-Yua" + }, + "nuc": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Poyanawa Subgroup" + }, + "nud": { + "level0": "Ndu" + }, + "nue": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic", + "level9": "Mid-Southern Central Core Bandaic" + }, + "nuf": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Nusoish" + }, + "nug": { + "level0": "Mirndi", + "level1": "Yirram" + }, + "nuh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mbongno-Mvano", + "level11": "Mvano-Ndunda" + }, + "nui": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Bengaic", + "level9": "Yasa-Kombe" + }, + "nuj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Western Luyia", + "level14": "Saamiaic" + }, + "nuk": { + "level0": "Wakashan", + "level1": "Southern Wakashan" + }, + "nul": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Uliase", + "level8": "Hatuhaha", + "level9": "Saparuan", + "level10": "Elpaputi" + }, + "num": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "East Uvean-Niuafo'ou" + }, + "nun": { + "level0": "Sino-Tibetan", + "level1": "Nungish", + "level2": "Gunong" + }, + "nuo": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Viet-Muong", + "level3": "Muongic" + }, + "nup": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid" + }, + "nuq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "Central Northern Outlier Polynesian", + "level12": "Takuuic" + }, + "nur": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "Central Northern Outlier Polynesian", + "level12": "Takuuic" + }, + "nus": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Dinka-Nuer", + "level3": "Nuer-Reel" + }, + "nut": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Debao-Jingxi-Nung" + }, + "nuu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic" + }, + "nuv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "Northern Grusi", + "level8": "Nuna-Kasem", + "level9": "Nuni" + }, + "nuw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Yapesic" + }, + "nux": { + "level0": "Sepik", + "level1": "Sepik Tama", + "level2": "Mehek-Pahi" + }, + "nuy": { + "level0": "Gunwinyguan", + "level1": "Eastern Gunwinyguan", + "level2": "Wubuy-Anindilyakwa" + }, + "nuz": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Western Nahuatl", + "level5": "Western Periphery-North Guerrero Nahuatl", + "level6": "North Guerrero Nahuatl" + }, + "nvh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Southwestern Malakula" + }, + "nvm": { + "level0": "Koiarian", + "level1": "Baraic", + "level2": "Barai-Namiae" + }, + "nvo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Sanaga-West Mbam (A.40)", + "level10": "West Mbam (A.40)", + "level11": "Mandi-Nyokon" + }, + "nwa": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Arapahoic" + }, + "nwb": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee" + }, + "nwe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "West Bamileke", + "level11": "Bamboutos" + }, + "nwi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Tanna", + "level7": "Southern Tanna" + }, + "nwm": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "Baka-Beli", + "level3": "Morokodo-Beli", + "level4": "Lori" + }, + "nwo": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Unclassified Core Thura-Yura" + }, + "nwr": { + "level0": "Yareban", + "level1": "Yareba-Bariji-Nawaru" + }, + "nww": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic" + }, + "nxa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Eastern Timor", + "level4": "Kawaimina" + }, + "nxd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Mongoic", + "level11": "Lomongo" + }, + "nxe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Central Flores-Paluqe", + "level6": "Central Flores", + "level7": "Eastern Central Flores", + "level8": "Nage-Keo" + }, + "nxg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Central Flores-Paluqe", + "level6": "Central Flores", + "level7": "Ngada" + }, + "nxi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Rufijic", + "level9": "Unclassified Rufijic" + }, + "nxj": { + "level0": "Bookkeeping" + }, + "nxk": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "nxl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Patakai-Manusela", + "level4": "Patakai" + }, + "nxm": { + "level0": "Unclassifiable" + }, + "nxn": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Mayabic", + "level3": "Nuclear Mayabic" + }, + "nxo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ngomic", + "level8": "Nuclear Ngomic", + "level9": "Sake-Ndambomo" + }, + "nxq": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Naic", + "level4": "Naish" + }, + "nxr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Lowland Ok" + }, + "nxu": { + "level0": "Bookkeeping" + }, + "nxx": { + "level0": "Sentanic", + "level1": "Nuclear Sentanic", + "level2": "Sentani-Nafri" + }, + "nya": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja", + "level9": "Nyanjaic" + }, + "nyb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Avatime-Nyangbo", + "level5": "Nyangbo-Tafi" + }, + "nyc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Ngbele-Ngenda", + "level15": "Extreme North Vestigial Suffixes Bantu" + }, + "nyd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia", + "level10": "Luyia", + "level11": "Saamia-Wanga-Bukusu", + "level12": "Saamia-Wanga", + "level13": "Central-Eastern Luyia" + }, + "nye": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Greater Luyana", + "level8": "Western Greater Luyana", + "level9": "Simaaic" + }, + "nyf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Mijikenda", + "level12": "Northern Mijikenda" + }, + "nyg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "Forest Kivu", + "level12": "Fuliiric" + }, + "nyh": { + "level0": "Nyulnyulan", + "level1": "Eastern Nyulnyulan", + "level2": "Nyikinic" + }, + "nyi": { + "level0": "Nyimang" + }, + "nyj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega" + }, + "nyk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia", + "level11": "Nyaneka-Nkhumbi" + }, + "nyl": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "West Katuic", + "level3": "Kuy-Souei" + }, + "nym": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Sukuma-Nyamwezi (F.20)", + "level9": "Nyamwezic" + }, + "nyn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "North Rutara", + "level12": "Nkore-Kiga-Nyoro-Tooro", + "level13": "Nkore-Kiga" + }, + "nyo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "North Rutara", + "level12": "Nkore-Kiga-Nyoro-Tooro", + "level13": "Nyoro-Tooro" + }, + "nyp": { + "level0": "Kuliak", + "level1": "Ngangea-So" + }, + "nyq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Central Iran Kermanic", + "level8": "Nuclear Central Iran Kermanic", + "level9": "Yazdi-Kermani-Nayini" + }, + "nyr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Nyika-Lambya", + "level10": "Nyika", + "level11": "Central and Southern Nyika", + "level12": "Mbozi-Malawi Nyika" + }, + "nys": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan" + }, + "nyt": { + "level0": "Pama-Nyungan", + "level1": "Nyawaygic" + }, + "nyu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja", + "level9": "Senaic" + }, + "nyv": { + "level0": "Nyulnyulan", + "level1": "Western Nyulnyulan", + "level2": "Nyulnyulic" + }, + "nyx": { + "level0": "Pama-Nyungan", + "level1": "Macleay-New England" + }, + "nyy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Nyakyusa-Ndali" + }, + "nza": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Jukunoid", + "level4": "Central Jukunoid", + "level5": "Jukun-Mbembe-Wurbo" + }, + "nzb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Nzebi-Laali-Yaa", + "level19": "Njebi (B.50)", + "level20": "Ndjavi A" + }, + "nzd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic" + }, + "nzi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Southern Bia", + "level9": "Jwira-Nzima" + }, + "nzk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Zandic", + "level6": "Zande-Nzakara" + }, + "nzm": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Zemeic", + "level3": "Nuclear Zemeic", + "level4": "Mzieme-Zeme" + }, + "nzr": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Northwest South Bauchi", + "level7": "Polci-Luri", + "level8": "Polcic", + "level9": "Zulic" + }, + "nzs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic", + "level3": "BANZL" + }, + "nzy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Central Mbum", + "level6": "Karangic" + }, + "nzz": { + "level0": "Dogon", + "level1": "Nangan Dogon" + }, + "oaa": { + "level0": "Tungusic", + "level1": "Central-Western Tungusic", + "level2": "Ulchaic" + }, + "oac": { + "level0": "Tungusic", + "level1": "Northeastern Tungusic", + "level2": "Central-Eastern Tungusic", + "level3": "Oroch-Udihe" + }, + "oar": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic" + }, + "obi": { + "level0": "Chumashan" + }, + "obl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Unclassified Cameroun-Ubangian" + }, + "obo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Manobo", + "level4": "Central and Southern Manobo", + "level5": "East-West-Central Manobo", + "level6": "West Manobo" + }, + "obr": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Mranmaic", + "level6": "Nuclear Mranmaic" + }, + "obu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta", + "level5": "Abua-Odual" + }, + "oca": { + "level0": "Huitotoan", + "level1": "Nonuya-Ocaina" + }, + "occ": { + "level0": "Bookkeeping" + }, + "och": { + "level0": "Sino-Tibetan", + "level1": "Sinitic" + }, + "oci": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "Occitanic" + }, + "oco": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Brythonic", + "level7": "Southwestern Brythonic" + }, + "ocu": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Matlatzincan" + }, + "oda": { + "level0": "Bookkeeping" + }, + "odk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Western Rajasthani", + "level11": "Indus Rajasthani" + }, + "odt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch" + }, + "odu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta", + "level5": "Abua-Odual" + }, + "ofo": { + "level0": "Siouan", + "level1": "Ohio Valley Siouan", + "level2": "Southeastern Siouan" + }, + "ofs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Frisian" + }, + "ofu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Ekoid-Mbe", + "level6": "Ekoid", + "level7": "Bakor-Ejagham", + "level8": "Bakor", + "level9": "Nde-Efutop" + }, + "ogb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta", + "level5": "Kugboic" + }, + "ogc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Igboid", + "level4": "Nuclear Igboid" + }, + "oge": { + "level0": "Kartvelian", + "level1": "Georgian-Zan", + "level2": "Georgic" + }, + "ogg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta", + "level5": "Unclassified Central Delta" + }, + "ogo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Ogonoid", + "level5": "East Ogonoid", + "level6": "Tai-Kana" + }, + "ogu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Central Delta" + }, + "ohu": { + "level0": "Uralic", + "level1": "Hungaric" + }, + "oia": { + "level0": "Timor-Alor-Pantar", + "level1": "East Timor", + "level2": "Fataluku-Oirata" + }, + "oie": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Lotuxo", + "level5": "Lopit-Dongotono", + "level6": "Dongotonic" + }, + "oin": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei", + "level2": "One", + "level3": "Central-Northern One" + }, + "ojb": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa", + "level6": "Nuclear Ojibwe", + "level7": "Northwestern-Saulteaux Ojibwa" + }, + "ojc": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa", + "level6": "Nuclear Ojibwe", + "level7": "Central-Eastern-Southwestern Ojibwa" + }, + "ojg": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa", + "level6": "Nuclear Ojibwe", + "level7": "Central-Eastern-Southwestern Ojibwa" + }, + "ojp": { + "level0": "Japonic", + "level1": "Japanesic" + }, + "ojs": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa", + "level6": "Severn-Algonquin" + }, + "ojv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "Central Northern Outlier Polynesian" + }, + "ojw": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa", + "level6": "Nuclear Ojibwe", + "level7": "Northwestern-Saulteaux Ojibwa" + }, + "oka": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Southern Interior Salish", + "level3": "Okanaganic" + }, + "okb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "West Lower Cross" + }, + "okd": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Western Ijo", + "level3": "Inland Ijo", + "level4": "Biseni-Okordia" + }, + "oke": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Southwestern Edoid" + }, + "okg": { + "level0": "Bookkeeping" + }, + "okh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic", + "level10": "Khalkhalic" + }, + "oki": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Okiek-Akie" + }, + "okj": { + "level0": "Great Andamanese", + "level1": "Middle Great Andamanese" + }, + "okk": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei", + "level2": "One" + }, + "okl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "OKSLic" + }, + "okn": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Amami", + "level4": "Nuclear Amami", + "level5": "Okinoerabu-Tokunoshima" + }, + "okr": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Eastern Ijo", + "level3": "Nikio", + "level4": "Kio Ijo" + }, + "oks": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo" + }, + "oku": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "Center Ring", + "level10": "Komic" + }, + "okv": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "Nuclear Binanderean", + "level4": "South Binanderean", + "level5": "Orokaivic" + }, + "okx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Southern Northwestern Edoid", + "level7": "Okpe-Akuku-Idesa" + }, + "ola": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic" + }, + "old": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Kilimanjaro Bantu", + "level10": "Chaga", + "level11": "Central Kilimanjaro" + }, + "ole": { + "level0": "Sino-Tibetan" + }, + "olk": { + "level0": "Bookkeeping" + }, + "olm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Southern Northwestern Edoid" + }, + "olo": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Ladogan", + "level6": "East Ladoga" + }, + "olu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia" + }, + "oma": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dhegiha" + }, + "omb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Ambae" + }, + "ome": { + "level0": "Bookkeeping" + }, + "omg": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup III", + "level7": "Omagua-Kokama" + }, + "omi": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Central Moru-Madi", + "level3": "Kalikoic" + }, + "omk": { + "level0": "Yukaghir" + }, + "oml": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic" + }, + "omn": { + "level0": "Unclassifiable" + }, + "omo": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Osum-Wadaginam-Pomoikan" + }, + "omr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani", + "level9": "Old-Modern Marathi" + }, + "omt": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Tatoga-Omotik" + }, + "omw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Tairora" + }, + "omx": { + "level0": "Austroasiatic", + "level1": "Monic" + }, + "ona": { + "level0": "Chonan", + "level1": "Insular Chonan" + }, + "onb": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Beic", + "level4": "Lingao" + }, + "one": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian", + "level2": "Mohawk-Oneida" + }, + "ong": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Au-Olo-Elkei", + "level5": "Olo-Elkei" + }, + "oni": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuclear Tanimbar-Bomberai", + "level4": "Yamdena-Onin", + "level5": "Oninic" + }, + "onj": { + "level0": "Dagan" + }, + "onk": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei", + "level2": "One", + "level3": "Central-Northern One" + }, + "onn": { + "level0": "Bosavi", + "level1": "Bosavi Watershed" + }, + "ono": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian" + }, + "onp": { + "level0": "Sino-Tibetan", + "level1": "Kho-Bwa", + "level2": "Western Kho-Bwa", + "level3": "Sartang-Sherdukpen" + }, + "onr": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei", + "level2": "One", + "level3": "Central-Northern One" + }, + "ons": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Kalasa" + }, + "ont": { + "level0": "Bookkeeping" + }, + "onu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage", + "level8": "Central-Southeast Malakula", + "level9": "Unua-Pangkumu" + }, + "onw": { + "level0": "Nubian", + "level1": "Nile Nubian", + "level2": "Nobiin Nubian" + }, + "onx": { + "level0": "Pidgin", + "level1": "Onin-based pidgin" + }, + "ood": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tepiman", + "level3": "Piman" + }, + "oog": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "Ta'oihic", + "level3": "Ong-Ta'oih" + }, + "oon": { + "level0": "Jarawa-Onge" + }, + "oor": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Global Dutch", + "level9": "Afrikaansic" + }, + "oos": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Sogdic-Ossetic", + "level6": "Ossetic" + }, + "opa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Southern Northwestern Edoid" + }, + "ope": { + "level0": "Bookkeeping" + }, + "opk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Kwer-Kopkaka-Burumakok" + }, + "opm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin" + }, + "opo": { + "level0": "Eleman", + "level1": "Western Eleman" + }, + "opt": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Opata-Eudeve" + }, + "opy": { + "level0": "Nuclear-Macro-Je" + }, + "ora": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Malaita", + "level8": "Southern Malaita" + }, + "orc": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Nuclear Oromo", + "level8": "Central-Eastern Oromo", + "level9": "Central Oromo" + }, + "ore": { + "level0": "Tucanoan", + "level1": "Western Tucanoan", + "level2": "Napo Tucanoan" + }, + "org": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "North-South Central Delta Cross", + "level7": "Koring-Kukele" + }, + "orh": { + "level0": "Tungusic", + "level1": "Northeastern Tungusic", + "level2": "Northern Tungusic" + }, + "ork": { + "level0": "Bookkeeping" + }, + "orn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric" + }, + "oro": { + "level0": "Eleman", + "level1": "Western Eleman" + }, + "orr": { + "level0": "Ijoid", + "level1": "Ijo", + "level2": "Western Ijo", + "level3": "Inland Ijo" + }, + "ors": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric" + }, + "ort": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Macro-Oriya" + }, + "oru": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Ormuri-Parachi" + }, + "orv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "East Slavic" + }, + "orw": { + "level0": "Chapacuran", + "level1": "Moreic-Waric", + "level2": "Waric", + "level3": "Wanham-Wari-Oro Win", + "level4": "Wari-Oro Win" + }, + "orx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "West Lower Cross", + "level7": "Oroic", + "level8": "Ebughu-Oro" + }, + "ory": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Macro-Oriya" + }, + "orz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Jayapura Bay" + }, + "osa": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dhegiha", + "level3": "Osage-Kansa" + }, + "osc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Sabellic" + }, + "osi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Javanesic", + "level3": "Modern Javanese" + }, + "oso": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Igwic" + }, + "osp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic" + }, + "oss": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Sogdic-Ossetic", + "level6": "Ossetic", + "level7": "Modern Ossetic" + }, + "ost": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Southwest Grassfields" + }, + "osu": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei", + "level2": "One" + }, + "osx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Alts\u00e4chsisch" + }, + "otd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "North West Greater Barito" + }, + "ote": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Otomi", + "level6": "Northwestern Otomi" + }, + "otk": { + "level0": "Bookkeeping" + }, + "otl": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Otomi", + "level6": "Southern Otomi" + }, + "otm": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Otomi", + "level6": "Eastern Otomi" + }, + "otn": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Otomi", + "level6": "Eastern Otomi" + }, + "otq": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Otomi", + "level6": "Northwestern Otomi" + }, + "otr": { + "level0": "Heibanic", + "level1": "West-Central Heibanic", + "level2": "Central Heibanic", + "level3": "Ebang-Logol" + }, + "ots": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Southwestern Otomi" + }, + "ott": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Southwestern Otomi" + }, + "otu": { + "level0": "Bororoan", + "level1": "Bororo-Otuke" + }, + "otw": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi", + "level5": "Ojibwa" + }, + "otx": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Otomi", + "level6": "Eastern Otomi" + }, + "oty": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid" + }, + "otz": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Otomian", + "level5": "Otomi", + "level6": "Southern Otomi" + }, + "oua": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic", + "level4": "Northern Saharan Oasis Berber", + "level5": "Ouargli-Oued Righ" + }, + "oub": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee", + "level5": "Guere-Krahn" + }, + "oue": { + "level0": "South Bougainville", + "level1": "Nasioiic", + "level2": "Nasioi", + "level3": "South-Central Nasioi", + "level4": "Central Nasioi" + }, + "oui": { + "level0": "Turkic", + "level1": "Common Turkic" + }, + "oum": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "Oumic" + }, + "oun": { + "level0": "Bookkeeping" + }, + "owi": { + "level0": "Left May" + }, + "owl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Brythonic", + "level7": "Old-Modern Welsh" + }, + "oyb": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Nuclear West Bahnaric" + }, + "oyd": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo", + "level3": "Central Ometo" + }, + "oym": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VIII", + "level6": "Wayampi-Zoe-Emerillon" + }, + "oyy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic" + }, + "ozm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Mpoic", + "level10": "Njemic" + }, + "pab": { + "level0": "Arawakan", + "level1": "Central-Eastern Maipuran", + "level2": "Central Maipuran", + "level3": "Xaray", + "level4": "Parecis-Nawe" + }, + "pac": { + "level0": "Austroasiatic", + "level1": "Katuic" + }, + "pad": { + "level0": "Arawan" + }, + "pae": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Middle Bomokandian", + "level15": "Late Bomokandian", + "level16": "Pagabeteic" + }, + "paf": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva", + "level7": "Unclassified Kawahiva" + }, + "pag": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Southern Cordilleran", + "level6": "West Southern Cordilleran" + }, + "pah": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva", + "level7": "Nuclear Kawahiva", + "level8": "Central Kawahiva" + }, + "pai": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Tarokoid", + "level5": "Yangkam-Tarok-Pe", + "level6": "Tarok-Pe" + }, + "paj": { + "level0": "Bookkeeping" + }, + "pak": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup IV", + "level6": "Tupi-Guarani Subgroup IV.A" + }, + "pal": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian" + }, + "pam": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon" + }, + "pan": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic", + "level9": "Eastern Panjabic" + }, + "pao": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Numic", + "level3": "Western Numic" + }, + "pap": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Upper Guinea Portuguese" + }, + "paq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewati-Gojri" + }, + "par": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Numic", + "level3": "Central Numic" + }, + "pas": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Doutai-Kai-Waritai" + }, + "pau": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian" + }, + "pav": { + "level0": "Chapacuran", + "level1": "Moreic-Waric", + "level2": "Waric", + "level3": "Wanham-Wari-Oro Win", + "level4": "Wari-Oro Win" + }, + "paw": { + "level0": "Caddoan", + "level1": "Northern Caddoan", + "level2": "Pawnee-Kitsai", + "level3": "Pawnee-Arikara" + }, + "pax": { + "level0": "Unattested" + }, + "pay": { + "level0": "Chibchan" + }, + "pbc": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Pemong-Panare", + "level3": "Pemongan", + "level4": "Kapong" + }, + "pbe": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan", + "level6": "Popolocan", + "level7": "Southwestern Popolocan", + "level8": "Tepexi-Zapotitlan" + }, + "pbf": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan", + "level6": "Popolocan", + "level7": "Southwestern Popolocan" + }, + "pbg": { + "level0": "Arawakan", + "level1": "Caribbean Arawakan", + "level2": "Guajiro-Paraujano" + }, + "pbh": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Pemong-Panare" + }, + "pbi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mandaraic", + "level6": "Podoko" + }, + "pbl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Northern Bikwin-Jen", + "level6": "Mak-Tal" + }, + "pbm": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Northwest Alta Mazatec" + }, + "pbn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Yandangic", + "level7": "Bali-Kpasam" + }, + "pbo": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Central Atlantic", + "level3": "Bak", + "level4": "Joola-Manjaku", + "level5": "Manjaku-Mankanya-Pepel", + "level6": "Cur-Bok-Cotier" + }, + "pbp": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Jaad" + }, + "pbr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Kisi-Pangwa" + }, + "pbs": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Pamean" + }, + "pbt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Pashto", + "level5": "Nuclear Pashto" + }, + "pbu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Pashto", + "level5": "Nuclear Pashto" + }, + "pbv": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Khasian", + "level3": "Khasi-Pnar-Lyngngam", + "level4": "Khasi-Pnar" + }, + "pbz": { + "level0": "Bookkeeping" + }, + "pca": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan", + "level6": "Popolocan", + "level7": "Southwestern Popolocan", + "level8": "Tepexi-Zapotitlan" + }, + "pcb": { + "level0": "Austroasiatic", + "level1": "Pearic" + }, + "pcc": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai" + }, + "pcd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil" + }, + "pce": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "West Palaungic", + "level4": "Palaung" + }, + "pcf": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Tamil-Paliyan" + }, + "pcg": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid", + "level10": "Ravulic" + }, + "pch": { + "level0": "Unattested", + "level1": "Dravidian (Unattested)" + }, + "pci": { + "level0": "Dravidian", + "level1": "Central Dravidian", + "level2": "Parji-Ollari-Gadaba" + }, + "pcj": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "Sora-Juray-Gorum" + }, + "pck": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Thadoic" + }, + "pcl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "pcm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "West African Creole English", + "level13": "Coastal Nigerian Krio", + "level14": "Nigeria-Cameroon Creole English" + }, + "pcn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Piti-Atsam" + }, + "pcp": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Bolivian Nawa" + }, + "pcr": { + "level0": "Bookkeeping" + }, + "pcw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Goemaic", + "level7": "Talic", + "level8": "Piapung-Koenoem" + }, + "pda": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Osum-Wadaginam-Pomoikan", + "level5": "Pomoikan" + }, + "pdc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "West Middle German", + "level8": "Rhenish Franconian", + "level9": "Palatinate" + }, + "pdi": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "White Tai" + }, + "pdn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Anus-Podena" + }, + "pdo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Western Bungku-Tolaki", + "level8": "Interior Bungku-Tolaki" + }, + "pdt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Alts\u00e4chsisch", + "level7": "Middle-Modern Low German", + "level8": "Low German", + "level9": "Greater East Low German" + }, + "pdu": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Northern Karen" + }, + "pea": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Betawic" + }, + "peb": { + "level0": "Pomoan", + "level1": "Russian River and Eastern" + }, + "pec": { + "level0": "Bookkeeping" + }, + "ped": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kaukombaran" + }, + "pee": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Southern Tomini" + }, + "pef": { + "level0": "Pomoan" + }, + "peg": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Konda-Kui", + "level4": "Manda-Kui", + "level5": "Manda-Pengo" + }, + "peh": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Southern Periphery Mongolic", + "level3": "Shirongol", + "level4": "Baoanic" + }, + "pei": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean" + }, + "pej": { + "level0": "Pomoan", + "level1": "Russian River and Eastern", + "level2": "Russian River", + "level3": "Northern-Central Pomoan" + }, + "pek": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "South-East Admiralty" + }, + "pel": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay", + "level6": "Kerinci-Minangkabau", + "level7": "Minangkabauic" + }, + "pem": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbala-Holu-Sondi (K.10)", + "level10": "Holu (K.10)", + "level11": "Pheende-Kwezo" + }, + "peo": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian" + }, + "pep": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Tonda", + "level3": "Eastern Tonda" + }, + "peq": { + "level0": "Pomoan", + "level1": "Russian River and Eastern", + "level2": "Russian River", + "level3": "Southern Pomoan-Kashaya" + }, + "pes": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic" + }, + "pev": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Mapoyo-Tamanaku", + "level3": "Mapoyo-Yawarana" + }, + "pex": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic", + "level11": "Buka" + }, + "pey": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Global Dutch" + }, + "pez": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Lowland Kenyah", + "level6": "Western Lowland Kenyah-Penan", + "level7": "Penan" + }, + "pfa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Satawalese-Carolinian", + "level12": "Macro-Carolinian", + "level13": "Murilo-Fanapanges" + }, + "pfe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru" + }, + "pfl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "West Middle German", + "level8": "Rhenish Franconian", + "level9": "Palatinate" + }, + "pga": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Egyptic Arabic", + "level7": "Egypto-Sudanic Arabic", + "level8": "Sudanese-Chadian Arabic", + "level9": "East Sudanic Arabic" + }, + "pgd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic" + }, + "pgg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Chamealic" + }, + "pgi": { + "level0": "Border", + "level1": "Bewani", + "level2": "Pagi-Kilmeri" + }, + "pgk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage", + "level8": "Central-Southeast Malakula", + "level9": "Unua-Pangkumu" + }, + "pgs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Mumuyic" + }, + "pgu": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran", + "level3": "Kao River", + "level4": "Paguic" + }, + "pgy": { + "level0": "Bookkeeping" + }, + "pgz": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic", + "level3": "BANZL", + "level4": "Auslanic" + }, + "pha": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Paheng-Younuo", + "level3": "Paheng" + }, + "phd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani", + "level9": "Old-Modern Marathi", + "level10": "Modern Marathi", + "level11": "Western Marathi" + }, + "phg": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "Katu" + }, + "phh": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Phowa", + "level8": "Hlepho-Phukha" + }, + "phj": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Newaric", + "level4": "Newar" + }, + "phk": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Mogaung", + "level12": "Assam Tai A" + }, + "phl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic", + "level8": "Western Shinaic", + "level9": "Dangari" + }, + "phm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja", + "level9": "Senaic" + }, + "phn": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Canaanite", + "level6": "Ugarito-Phoenician", + "level7": "Phoenician-Punic" + }, + "pho": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Bisoid", + "level7": "Phunoi-Coong" + }, + "phq": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic" + }, + "phr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic", + "level9": "Paharic" + }, + "pht": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Thai PH", + "level9": "Siamese" + }, + "phu": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Thai PH", + "level9": "Siamese" + }, + "phv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic", + "level9": "Eastern Farsic" + }, + "phw": { + "level0": "Bookkeeping" + }, + "pia": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tepiman", + "level3": "Piman" + }, + "pib": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Purus-Chamicuro", + "level3": "Purus", + "level4": "Yineic", + "level5": "Western Yineic" + }, + "pic": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "B10-B30", + "level8": "Okani (B.30)", + "level9": "Northern Okani", + "level10": "Himba-Pinji" + }, + "pid": { + "level0": "Saliban", + "level1": "Maco-Piaroa" + }, + "pie": { + "level0": "Kiowa-Tanoan", + "level1": "Tiwa-Piro" + }, + "pif": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Ponapeic" + }, + "pig": { + "level0": "Unattested", + "level1": "Pano-Tacanan (Unattested)" + }, + "pih": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English" + }, + "pij": { + "level0": "Unclassifiable" + }, + "pil": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Yom-Nawdm" + }, + "pim": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian" + }, + "pin": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Western Sepik Hill", + "level3": "Hewa-April River" + }, + "pio": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Northeast Japura-Colombia", + "level4": "Piapoco-Achagua" + }, + "pip": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Tangalic", + "level7": "Nuclear Tangalic", + "level8": "Peroic" + }, + "pir": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan II", + "level4": "Kotiria-Piratapuyo", + "level5": "Piratapuyic" + }, + "pis": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Pacific Creole English", + "level12": "Early Melanesian Pidgin" + }, + "pit": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Palku", + "level3": "Pitta-Pitta" + }, + "piu": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Pintupic", + "level4": "Nuclear Pintupic" + }, + "piv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian" + }, + "piw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mwika" + }, + "pix": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage" + }, + "piy": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Tangalic", + "level7": "Nuclear Tangalic", + "level8": "Peroic" + }, + "piz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Nmi-Pij-Fwa-Pam-Pap", + "level10": "Nmi-Fij-Fwa" + }, + "pjt": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Pintupic", + "level4": "Nuclear Pintupic", + "level5": "Wangkatja-Tjarra", + "level6": "Tjarra" + }, + "pkb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian" + }, + "pkc": { + "level0": "Unclassifiable" + }, + "pkg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus" + }, + "pkh": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin" + }, + "pkn": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Kuku-Wik", + "level6": "Mungkanic" + }, + "pko": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Northern Kalenjin" + }, + "pkp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Ellicean", + "level9": "Pukapukic" + }, + "pkr": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Irula-Muduga", + "level8": "Muduga-Palu" + }, + "pks": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Indo-Pakistani-Nepalese Sign", + "level3": "Indo-Pakistani Sign" + }, + "pkt": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Chutic", + "level3": "East Chutic" + }, + "pku": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Maanyan-Paku" + }, + "pla": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kaukombaran" + }, + "plb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "East Santo", + "level9": "Southeast Santo" + }, + "plc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Palawanic", + "level4": "Southern Palawanic", + "level5": "Molbog-Palawan", + "level6": "Nuclear Palawan" + }, + "pld": { + "level0": "Unclassifiable" + }, + "ple": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Central Flores-Paluqe" + }, + "plg": { + "level0": "Guaicuruan", + "level1": "Guaicuru del Sur", + "level2": "Qom", + "level3": "Pilaga-Toba" + }, + "plh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua" + }, + "pli": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari" + }, + "plk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic", + "level8": "Kohistanic Shina" + }, + "pll": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "West Palaungic", + "level4": "Palaung" + }, + "pln": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic", + "level13": "South Castilic" + }, + "plo": { + "level0": "Mixe-Zoque", + "level1": "Mixe" + }, + "plp": { + "level0": "Bookkeeping" + }, + "plq": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic" + }, + "plr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "South Senufo" + }, + "pls": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan", + "level6": "Popolocan" + }, + "plt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "North-Central Malagasic", + "level7": "Central-Eastern Malagasic" + }, + "plu": { + "level0": "Arawakan", + "level1": "Central-Eastern Maipuran" + }, + "plv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Palawanic", + "level4": "Southern Palawanic", + "level5": "Molbog-Palawan", + "level6": "Nuclear Palawan", + "level7": "Brooke-Canipaan Palawan" + }, + "plw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Palawanic", + "level4": "Southern Palawanic", + "level5": "Molbog-Palawan", + "level6": "Nuclear Palawan", + "level7": "Brooke-Canipaan Palawan" + }, + "ply": { + "level0": "Austroasiatic", + "level1": "Mangic", + "level2": "Pakanic" + }, + "plz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic" + }, + "pma": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu" + }, + "pmb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Zandic", + "level6": "Barambo-Pambia" + }, + "pmc": { + "level0": "Unattested" + }, + "pmd": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Eastern Victoria", + "level4": "Dhudhuroa-Pallanganmiddang" + }, + "pme": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Nmi-Pij-Fwa-Pam-Pap" + }, + "pmf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Pamona-Tombelala" + }, + "pmh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone" + }, + "pmi": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Pumi" + }, + "pmj": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Pumi" + }, + "pml": { + "level0": "Pidgin", + "level1": "Romance-based pidgin" + }, + "pmm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)" + }, + "pmn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Northern Mbum", + "level6": "Tupuri-Mundang-Mambai", + "level7": "Mundangic" + }, + "pmo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Central Yapen" + }, + "pmq": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Pamean" + }, + "pmr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "Apalic", + "level6": "Greater West Sogeram" + }, + "pms": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian", + "level12": "Piemontese-Lombard" + }, + "pmt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal" + }, + "pmw": { + "level0": "Miwok-Costanoan", + "level1": "Miwokan", + "level2": "Eastern Miwokan" + }, + "pmx": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Angami-Pochuri", + "level4": "Angami-Mao", + "level5": "Naga Maoic", + "level6": "Poumaic" + }, + "pmy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay" + }, + "pmz": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Otopame-Chinantecan", + "level3": "Otopamean", + "level4": "Pamean" + }, + "pna": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Punan Tubu-Bah" + }, + "pnb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic" + }, + "pnc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Pitu Ulunna Salu" + }, + "pnd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbundu (H.20)" + }, + "pne": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Lowland Kenyah", + "level6": "Western Lowland Kenyah-Penan", + "level7": "Penan", + "level8": "Western Penan-Sebop" + }, + "png": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Shiroro" + }, + "pnh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal" + }, + "pni": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Muller-Schwaner" + }, + "pnk": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Bolivian Arawakan", + "level3": "Mojeno-Paunaca" + }, + "pnl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Samu" + }, + "pnm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Aput-Busang-Merah-Kohi" + }, + "pnn": { + "level0": "Piawi" + }, + "pno": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Chama subgroup" + }, + "pnp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Munan", + "level9": "Munic", + "level10": "Western Munic" + }, + "pnq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "Northern Grusi" + }, + "pnr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Gum", + "level5": "Panim-Isebe-Bau" + }, + "pns": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Mongondowic" + }, + "pnt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "South Greek", + "level5": "Central Greek", + "level6": "Koineic Greek", + "level7": "Modern Koineic Greek", + "level8": "Pontic-Cappadocian Greek" + }, + "pnu": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Jiongnai-Ho Ne" + }, + "pnv": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Kanyara" + }, + "pnw": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda", + "level5": "Panytyima-Yinhawangka" + }, + "pnx": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Phay-Pram", + "level3": "Pramic" + }, + "pny": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Ngembaic" + }, + "pnz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Central Mbum", + "level6": "Karangic", + "level7": "Kare-Pana" + }, + "poa": { + "level0": "Bookkeeping" + }, + "pob": { + "level0": "Bookkeeping" + }, + "poc": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Poqom" + }, + "pod": { + "level0": "Bookkeeping" + }, + "poe": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan", + "level6": "Popolocan", + "level7": "Southwestern Popolocan" + }, + "pof": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke" + }, + "poh": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Poqom" + }, + "poi": { + "level0": "Mixe-Zoque", + "level1": "Zoque", + "level2": "Gulf Zoque" + }, + "poj": { + "level0": "Bookkeeping" + }, + "pol": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Lechitic", + "level6": "Polish-Silesian" + }, + "pom": { + "level0": "Pomoan" + }, + "pon": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Ponapeic" + }, + "poo": { + "level0": "Pomoan", + "level1": "Russian River and Eastern", + "level2": "Russian River", + "level3": "Northern-Central Pomoan" + }, + "pop": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Nmi-Pij-Fwa-Pam-Pap" + }, + "poq": { + "level0": "Mixe-Zoque", + "level1": "Zoque", + "level2": "Gulf Zoque", + "level3": "Texistepec-Ayapa Zoque" + }, + "por": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Brazil-Portugal Portuguese" + }, + "pos": { + "level0": "Mixe-Zoque", + "level1": "Mixe" + }, + "pot": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Ojibwa-Potawatomi" + }, + "pou": { + "level0": "Bookkeeping" + }, + "pov": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Upper Guinea Portuguese" + }, + "pow": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan", + "level6": "Popolocan", + "level7": "Southwestern Popolocan", + "level8": "Tepexi-Zapotitlan" + }, + "pox": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Lechitic" + }, + "poy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Kilombero" + }, + "ppa": { + "level0": "Bookkeeping" + }, + "ppi": { + "level0": "Cochimi-Yuman", + "level1": "Yuman", + "level2": "General Yuman", + "level3": "Pai" + }, + "ppk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Uma-Sarudu" + }, + "ppl": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec", + "level4": "Eastern Nahuatl", + "level5": "Isthmus-Pipil Nahuatl" + }, + "ppm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Central Yapen" + }, + "ppn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic" + }, + "ppo": { + "level0": "Teberan" + }, + "ppq": { + "level0": "Walioic", + "level1": "Pai-Sinen-Walio" + }, + "ppr": { + "level0": "Bookkeeping" + }, + "pps": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Ixcatec-Chocho-Popolocan", + "level5": "Chocho-Popolocan", + "level6": "Popolocan" + }, + "ppt": { + "level0": "Kamula-Elevala", + "level1": "Elevala" + }, + "ppu": { + "level0": "Austronesian", + "level1": "Western Plains Austronesian", + "level2": "Central Western Plains" + }, + "ppv": { + "level0": "Unattested", + "level1": "Pano-Tacanan (Unattested)" + }, + "pqa": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2", + "level6": "Western West Chadic B.2" + }, + "pqm": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Northern Eastern Algonquian", + "level6": "Micmacic" + }, + "prb": { + "level0": "Bookkeeping" + }, + "prc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Ormuri-Parachi" + }, + "prd": { + "level0": "Bookkeeping" + }, + "pre": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Lower Guinea Portuguese" + }, + "prf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Northeastern Luzon", + "level4": "Nuclear Northeastern Luzon", + "level5": "Paranan-Pahanan" + }, + "prg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic" + }, + "prh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Peripheral Central Bisayan" + }, + "pri": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Cem-Pac" + }, + "prk": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Waic", + "level5": "Wa-Lawa", + "level6": "Nuclear Waic" + }, + "prl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "West-Central South American Sign", + "level5": "Peruvian-Inmaculada Sign" + }, + "prn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Nuristani" + }, + "pro": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "Occitanic" + }, + "prp": { + "level0": "Bookkeeping" + }, + "prq": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Kampa-Amuesha", + "level3": "Pre-Andine Maipuran", + "level4": "Asha-Ashe-Kak-Matsi-Nan", + "level5": "Asha-Ashe-Kak", + "level6": "Ashe-Asha", + "level7": "Ashe-Asha Norte" + }, + "prr": { + "level0": "Puri-Coroado" + }, + "prs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic", + "level9": "Eastern Farsic" + }, + "prt": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Phay-Pram", + "level3": "Tinic", + "level4": "Tin" + }, + "pru": { + "level0": "South Bird's Head Family" + }, + "prv": { + "level0": "Bookkeeping" + }, + "prw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Numugenan", + "level6": "Yarawata-Parawen-Ukuriguma" + }, + "prx": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Western Archaic Tibetan", + "level5": "Shamskatic" + }, + "pry": { + "level0": "Bookkeeping" + }, + "prz": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Providencia-Cayman Sign" + }, + "psa": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Awyu" + }, + "psc": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "psd": { + "level0": "Sign Language", + "level1": "Auxiliary Sign Systems" + }, + "pse": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "South Sumatra Malay" + }, + "psg": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "psh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Pashayi", + "level5": "Western Pashayi" + }, + "psi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Pashayi", + "level5": "Eastern Pashayi" + }, + "psl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "psm": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup II", + "level7": "Warazu-Sirionoid" + }, + "psn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Seko", + "level5": "Panasuanic" + }, + "pso": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Central European Sign" + }, + "psp": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "psq": { + "level0": "Sepik", + "level1": "Sepik Tama", + "level2": "Mayo-Pasi", + "level3": "Yimin-Bel" + }, + "psr": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Swedish Sign" + }, + "pss": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Pasismanua" + }, + "pst": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Pashto", + "level5": "Nuclear Pashto" + }, + "psu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic" + }, + "psw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage", + "level8": "Central-Southeast Malakula", + "level9": "Southeastern Malakula linkage", + "level10": "Port Sandwich-Axamb-Avok" + }, + "psy": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Nanticoke-Conoy" + }, + "pta": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.A", + "level8": "Paraguay-Brazil Guarani", + "level9": "Kaiowa" + }, + "pth": { + "level0": "Nuclear-Macro-Je", + "level1": "Maxakali-Borum", + "level2": "Maxakalian", + "level3": "Nuclear Maxakalian" + }, + "pti": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Pintupic", + "level4": "Nuclear Pintupic", + "level5": "Wangkatja-Tjarra" + }, + "ptn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "South Halmahera", + "level6": "Central-Eastern South Halmahera" + }, + "pto": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VIII", + "level6": "Wayampi-Zoe-Emerillon", + "level7": "Zoe-Emerillon" + }, + "ptp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage", + "level9": "Mumeng", + "level10": "Zenag-Patep" + }, + "ptq": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Yerukula-Korava-Kaikadi" + }, + "ptr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Cape Cumberland" + }, + "ptt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Masenrempulu" + }, + "ptu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Pitu Ulunna Salu", + "level6": "Matangnga-Aralle-Tabulahan" + }, + "ptv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Ambrym", + "level7": "Orkon-West Ambrym", + "level8": "West Ambrym", + "level9": "Southwest Ambrym" + }, + "ptw": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "North Georgia Central Salish" + }, + "pty": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid", + "level10": "Kalanadic" + }, + "pua": { + "level0": "Tarascan" + }, + "pub": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Central Old Kuki" + }, + "puc": { + "level0": "Bookkeeping" + }, + "pud": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Aput-Busang-Merah-Kohi" + }, + "puf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Aput-Busang-Merah-Kohi" + }, + "pug": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi" + }, + "puj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Punan Tubu-Bah" + }, + "puk": { + "level0": "Bookkeeping" + }, + "pum": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Southern Kiranti" + }, + "puo": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Phay-Pram" + }, + "pup": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Kabenau" + }, + "pur": { + "level0": "Tupian", + "level1": "Purubora-Ramarama" + }, + "put": { + "level0": "Bookkeeping" + }, + "puu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic", + "level21": "Lumbuic", + "level22": "Ngubi-Sangu-Sira-Punu", + "level23": "Sangu-Sira-Punu", + "level24": "Punu-Vungu" + }, + "puw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Eastern Trukic", + "level12": "Puluwatese-Pollapese" + }, + "pux": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Serra Hills" + }, + "puy": { + "level0": "Chumashan", + "level1": "Southern Chumashan", + "level2": "Central Chumashan" + }, + "puz": { + "level0": "Bookkeeping" + }, + "pwb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos", + "level10": "Boze-Loro" + }, + "pwg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Are linkage", + "level10": "Boanaki-Paiwa" + }, + "pwi": { + "level0": "Wintuan" + }, + "pwm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Palawanic", + "level4": "Southern Palawanic", + "level5": "Molbog-Palawan" + }, + "pwn": { + "level0": "Austronesian" + }, + "pwo": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Peripheral Karen", + "level3": "Pwo", + "level4": "Eastern-Western Pwo Karen" + }, + "pwr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Eastern Hindi" + }, + "pww": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Peripheral Karen", + "level3": "Pwo", + "level4": "Northern Pwo Karen" + }, + "pxm": { + "level0": "Bookkeeping" + }, + "pye": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Ivorian Grebo" + }, + "pym": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Southeastern Benue-Congo Plateau", + "level5": "Horom-Fyem" + }, + "pyn": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Poyanawa Subgroup" + }, + "pys": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "pyu": { + "level0": "Austronesian" + }, + "pyx": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Jingpho-Luish", + "level3": "Luish", + "level4": "Unclassified Luish" + }, + "pyy": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Bisoid", + "level7": "Bisu-Pyen-Laomian" + }, + "pze": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Northwest South Bauchi", + "level7": "Polci-Luri", + "level8": "Polcic" + }, + "pzh": { + "level0": "Austronesian", + "level1": "Northwest Formosan" + }, + "pzn": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Central Naga", + "level4": "Yimchingric" + }, + "qbb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic" + }, + "qcs": { + "level0": "Mixe-Zoque", + "level1": "Mixe" + }, + "qer": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "North Scandinavian", + "level6": "East-Central Swedic" + }, + "qgu": { + "level0": "Pama-Nyungan", + "level1": "Nyawaygic" + }, + "qhr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Sabellic" + }, + "qkn": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid", + "level6": "Nuclear Kannaoid" + }, + "qlm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Western Caribbean Creole", + "level14": "Jamaicanic" + }, + "qmx": { + "level0": "Bookkeeping" + }, + "qok": { + "level0": "Austroasiatic", + "level1": "Khmeric" + }, + "qpp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone" + }, + "qua": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dhegiha" + }, + "qub": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "AP-AM-AH" + }, + "quc": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Core Quichean", + "level5": "Quiche-Achi" + }, + "qud": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua A" + }, + "quf": { + "level0": "Quechuan", + "level1": "Cajamarca-Lambayeque Quechua" + }, + "qug": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua A" + }, + "quh": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Bolivian-Argentinian Quechua", + "level3": "South Bolivian-Argentinian Quechua" + }, + "qui": { + "level0": "Chimakuan" + }, + "quj": { + "level0": "Bookkeeping" + }, + "quk": { + "level0": "Quechuan", + "level1": "San Martin-Amazonas Quechua" + }, + "qul": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Bolivian-Argentinian Quechua" + }, + "qum": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Core Quichean" + }, + "qun": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Tsamosan", + "level3": "Coastal Tsamosan" + }, + "qup": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B", + "level3": "Imbabura-Colombia-Oriente Quechua", + "level4": "Colombia-Oriente Quechua", + "level5": "Oriente Quechua", + "level6": "Pastaza Quechua" + }, + "quq": { + "level0": "Unclassifiable" + }, + "qur": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Yaru Quechua" + }, + "qus": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Bolivian-Argentinian Quechua", + "level3": "South Bolivian-Argentinian Quechua" + }, + "qut": { + "level0": "Bookkeeping" + }, + "quu": { + "level0": "Bookkeeping" + }, + "quv": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Core Quichean" + }, + "quw": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B", + "level3": "Imbabura-Colombia-Oriente Quechua", + "level4": "Colombia-Oriente Quechua", + "level5": "Oriente Quechua" + }, + "qux": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Yauyosic" + }, + "quy": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Ayacuchan Quechua" + }, + "quz": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Cuscan Quechua" + }, + "qva": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Yaru Quechua" + }, + "qvc": { + "level0": "Quechuan", + "level1": "Cajamarca-Lambayeque Quechua" + }, + "qve": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Cuscan Quechua" + }, + "qvh": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Huaylay" + }, + "qvi": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B", + "level3": "Imbabura-Colombia-Oriente Quechua" + }, + "qvj": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B" + }, + "qvl": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "AP-AM-AH" + }, + "qvm": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "AP-AM-AH", + "level4": "Panao-Union" + }, + "qvn": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Yaru Quechua" + }, + "qvo": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B", + "level3": "Imbabura-Colombia-Oriente Quechua", + "level4": "Colombia-Oriente Quechua", + "level5": "Oriente Quechua" + }, + "qvp": { + "level0": "Quechuan", + "level1": "Quechua I" + }, + "qvs": { + "level0": "Quechuan", + "level1": "San Martin-Amazonas Quechua" + }, + "qvw": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Jauja-Huanca" + }, + "qvy": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic" + }, + "qvz": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B", + "level3": "Imbabura-Colombia-Oriente Quechua", + "level4": "Colombia-Oriente Quechua", + "level5": "Oriente Quechua", + "level6": "Pastaza Quechua" + }, + "qwa": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Huaylay", + "level4": "Corongo-Sihuas" + }, + "qwc": { + "level0": "Quechuan" + }, + "qwh": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Huaylay" + }, + "qws": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Huaylay", + "level4": "Corongo-Sihuas" + }, + "qwt": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan" + }, + "qxa": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "AP-AM-AH" + }, + "qxc": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Yauyosic" + }, + "qxh": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "AP-AM-AH", + "level4": "Panao-Union" + }, + "qxi": { + "level0": "Bookkeeping" + }, + "qxl": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua A" + }, + "qxn": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Huaylay", + "level4": "Conchucos" + }, + "qxo": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Huaylay", + "level4": "Conchucos" + }, + "qxp": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Cuscan Quechua" + }, + "qxq": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz" + }, + "qxr": { + "level0": "Quechuan", + "level1": "Colombia-Ecuador Quechua", + "level2": "Ecuadorian Quechua B" + }, + "qxs": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Qiang" + }, + "qxu": { + "level0": "Quechuan", + "level1": "Southern Quechua", + "level2": "Ayacuchan Quechua" + }, + "qxw": { + "level0": "Quechuan", + "level1": "Quechua I", + "level2": "Central Quechua I", + "level3": "Jauja-Huanca" + }, + "qya": { + "level0": "Artificial Language" + }, + "qyp": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Southern New England Algonquian", + "level6": "Western Southern New England Algonquian" + }, + "raa": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Southern Kiranti" + }, + "rab": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Southern Kiranti" + }, + "rac": { + "level0": "Lakes Plain", + "level1": "Far West Lakes Plain", + "level2": "Rasawa-Saponi" + }, + "rad": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Rade-Jarai" + }, + "rae": { + "level0": "Bookkeeping" + }, + "raf": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Upper Arun", + "level6": "Mewahang" + }, + "rag": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Greater Luyia" + }, + "rah": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Kochic" + }, + "rai": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Kandas-Duke of York" + }, + "rak": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus II" + }, + "ral": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Thadoic" + }, + "ram": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Goyaz", + "level4": "Northern Je", + "level5": "Eastern Timbira", + "level6": "Southeastern Timbira" + }, + "ran": { + "level0": "Kolopom", + "level1": "Kimaama-Riantana" + }, + "rao": { + "level0": "Ramu" + }, + "rap": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Distal", + "level13": "Far East Polynesian" + }, + "rar": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal" + }, + "ras": { + "level0": "Rashad" + }, + "rat": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Southern Tatic", + "level10": "Ramand-Karaj" + }, + "rau": { + "level0": "Sino-Tibetan", + "level1": "Raji-Raute", + "level2": "Raute-Rawat" + }, + "rav": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Khambu" + }, + "raw": { + "level0": "Sino-Tibetan", + "level1": "Nungish" + }, + "rax": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Mumuyic" + }, + "ray": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal" + }, + "raz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Western Bungku-Tolaki", + "level8": "West Coast Bungku-Tolaki" + }, + "rbb": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "West Palaungic" + }, + "rcf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil", + "level13": "Central Oil", + "level14": "Macro-French" + }, + "rdb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Caspian", + "level8": "Gilaki-Rudbari" + }, + "rea": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Mindjim", + "level4": "Upper Minjim" + }, + "reb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Manggaraiic" + }, + "ree": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik", + "level5": "Kayanic", + "level6": "Rejang-Makaham Kayan" + }, + "reg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Suguti" + }, + "rei": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Macro-Oriya" + }, + "rej": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian" + }, + "rel": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana" + }, + "rem": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano" + }, + "ren": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Hre-Sedang-Todrah-Monam", + "level4": "Hre-Sedang" + }, + "rer": { + "level0": "Unattested" + }, + "res": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Kainji Lake" + }, + "ret": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "Kaera-Straits", + "level5": "Blagaric" + }, + "rey": { + "level0": "Pano-Tacanan", + "level1": "Tacanan", + "level2": "Takanik-Chamik", + "level3": "Takanik" + }, + "rga": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "South Santo" + }, + "rge": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "South Greek", + "level5": "Central Greek", + "level6": "Koineic Greek", + "level7": "Modern Koineic Greek", + "level8": "Nuclear Modern Greek" + }, + "rgk": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Pithauragarh" + }, + "rgn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian", + "level12": "Emiliano-Romagnolo" + }, + "rgr": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Caqueta" + }, + "rgs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Chru-Northern Cham", + "level6": "Chruic" + }, + "rgu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "Nuclear Rote", + "level5": "Central East Rote", + "level6": "Southeast Rote" + }, + "rhg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga" + }, + "rhp": { + "level0": "Nuclear Torricelli", + "level1": "Nuclear Maimai", + "level2": "Heyo-Yahang" + }, + "ria": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Dimasa-Kokborok", + "level5": "Tipperic" + }, + "rib": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "rie": { + "level0": "Bookkeeping" + }, + "rif": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic" + }, + "ril": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "West Palaungic", + "level4": "Riang" + }, + "rim": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Nyaturu-Nilamba" + }, + "rin": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic" + }, + "rir": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Southern Land Dayak" + }, + "rit": { + "level0": "Pama-Nyungan", + "level1": "Yuulngu", + "level2": "Southern Yolngu" + }, + "riu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Manggaraiic", + "level6": "Manggarai Khusus" + }, + "rjb": { + "level0": "Bookkeeping" + }, + "rjg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Manggaraiic", + "level6": "Waerana-Razong" + }, + "rji": { + "level0": "Sino-Tibetan", + "level1": "Raji-Raute" + }, + "rjs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Kamrupa", + "level10": "Kamta", + "level11": "Western Kamta" + }, + "rka": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau", + "level5": "Mnong", + "level6": "Southern-Central Mnong" + }, + "rkb": { + "level0": "Nuclear-Macro-Je" + }, + "rkh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal" + }, + "rki": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Mranmaic", + "level6": "Nuclear Mranmaic", + "level7": "Arakanese-Marma" + }, + "rkm": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "East Manding" + }, + "rkt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Kamrupa", + "level10": "Kamta" + }, + "rkw": { + "level0": "Bookkeeping" + }, + "rma": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Votic Chibchan" + }, + "rmb": { + "level0": "Gunwinyguan", + "level1": "Jala" + }, + "rmc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani" + }, + "rmd": { + "level0": "Speech Register", + "level1": "Indo-European Speech Register" + }, + "rme": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani", + "level10": "Anglo-Northwestern Romani", + "level11": "British Romani" + }, + "rmf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani", + "level10": "Anglo-Northwestern Romani", + "level11": "Northwestern Romani" + }, + "rmg": { + "level0": "Speech Register", + "level1": "Indo-European Speech Register", + "level2": "Scandinavian Romani" + }, + "rmh": { + "level0": "Lepki-Murkim-Kembra" + }, + "rmi": { + "level0": "Speech Register", + "level1": "Indo-European Speech Register" + }, + "rmk": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Tamolan", + "level3": "Breri-Romkun" + }, + "rml": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani" + }, + "rmm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Luangic-Kisaric", + "level5": "Kisaric" + }, + "rmn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani" + }, + "rmo": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani", + "level10": "Anglo-Northwestern Romani", + "level11": "Northwestern Romani" + }, + "rmp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Rempic" + }, + "rmq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani" + }, + "rmr": { + "level0": "Bookkeeping" + }, + "rms": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Central European Sign" + }, + "rmt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone" + }, + "rmu": { + "level0": "Speech Register", + "level1": "Indo-European Speech Register", + "level2": "Scandinavian Romani" + }, + "rmv": { + "level0": "Artificial Language" + }, + "rmw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani", + "level10": "Anglo-Northwestern Romani", + "level11": "British Romani" + }, + "rmx": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Lamamic" + }, + "rmy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Romani" + }, + "rmz": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Mranmaic", + "level6": "Nuclear Mranmaic", + "level7": "Arakanese-Marma" + }, + "rna": { + "level0": "Unattested", + "level1": "Chocoan (Unattested)" + }, + "rnb": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "rnd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Ruund-Salampasu", + "level11": "Lunda-Ruund-Kete", + "level12": "Ruund-Kete" + }, + "rng": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Tsonga-Copi", + "level12": "Tswa-Ronga (S.50)", + "level13": "Tsongan" + }, + "rnl": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin" + }, + "rnn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Biakic", + "level6": "Biak-Roon" + }, + "rnp": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Central-Eastern West Himalayish" + }, + "rnw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mwika", + "level10": "Fipaic", + "level11": "Maluwawaru" + }, + "rob": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Torajic" + }, + "roc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Chru-Northern Cham", + "level6": "Chruic" + }, + "rod": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Kamuku-Hungwarya", + "level7": "Kamuku", + "level8": "Rogo-Sagamuk-Sama-Sambuga" + }, + "roe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya" + }, + "rof": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Kilimanjaro Bantu", + "level10": "Chaga" + }, + "rog": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Aceh-Chamic", + "level4": "Chamic", + "level5": "Chru-Northern Cham", + "level6": "Northern Cham" + }, + "roh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian" + }, + "rol": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan" + }, + "ron": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Eastern Romance", + "level8": "Northern Romanian", + "level9": "Eastern Romanian" + }, + "roo": { + "level0": "North Bougainville", + "level1": "Rotokas-Askopan" + }, + "rop": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Pacific Creole English" + }, + "ror": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Central Flores-Paluqe", + "level6": "Central Flores", + "level7": "Ngada" + }, + "rou": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Runga-Kibet" + }, + "row": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "West Rote" + }, + "rpt": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Unclassified Hanseman" + }, + "rri": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Choiseul", + "level10": "East Choiseul" + }, + "rro": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "West Central Papuan linkage", + "level9": "Nuclear West Central Papuan linkage" + }, + "rrt": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama" + }, + "rsi": { + "level0": "Artificial Language" + }, + "rsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "RSLic", + "level3": "Nuclear RSLic", + "level4": "Central RSLic" + }, + "rsm": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "rsn": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Unclassified L1 Sign Language" + }, + "rsw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru", + "level9": "Voric" + }, + "rtc": { + "level0": "Bookkeeping", + "level1": "Pending Report Release" + }, + "rth": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sangiric", + "level3": "Southern Sangiric" + }, + "rtm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage" + }, + "rtw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Rathawi-Palya" + }, + "rub": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu" + }, + "ruc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "North Rutara" + }, + "rue": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "East Slavic", + "level5": "Ukrainian-Rusyn" + }, + "ruf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "East Ruvu" + }, + "rug": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "East New Georgia", + "level11": "Rovianic" + }, + "ruh": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Kochic" + }, + "rui": { + "level0": "Bookkeeping" + }, + "ruk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Ninzic", + "level5": "Rukubic" + }, + "run": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "West Highlands Kivu", + "level12": "Rundic" + }, + "ruo": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Eastern Romance", + "level8": "Northern Romanian" + }, + "rup": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Eastern Romance" + }, + "ruq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Eastern Romance", + "level8": "Northern Romanian", + "level9": "Eastern Romanian" + }, + "rus": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "East Slavic" + }, + "rut": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Western Samur" + }, + "ruu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Paitanic", + "level7": "Upper Kinabatangan-Lobu" + }, + "ruy": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "ruz": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "rwa": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Serra Hills", + "level3": "Rawo-Main Serra" + }, + "rwk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Kilimanjaro Bantu", + "level10": "Chaga", + "level11": "West Kilimanjaro" + }, + "rwm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Terrien", + "level11": "Ngombe-Ababuan", + "level12": "Ababuan", + "level13": "Old Bomokandian", + "level14": "Komoic" + }, + "rwo": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Gusap-Mot", + "level4": "Ufim-Rawa-Nahu" + }, + "rwr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewati-Gojri" + }, + "rws": { + "level0": "Bookkeeping" + }, + "rxd": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Ngumpin", + "level4": "Western Ngumpin" + }, + "rxw": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Central Karnic", + "level3": "Mithaka-Karuwali" + }, + "ryn": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Amami", + "level4": "Nuclear Amami", + "level5": "Oshima" + }, + "rys": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Southern Ryukyu", + "level3": "Macro-Yaeyama" + }, + "ryu": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Okinawa" + }, + "rzh": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Sayhadic", + "level5": "Modern Sayhadic" + }, + "saa": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.3", + "level5": "Sokoroic", + "level6": "Saba-Sokoro-Tamki" + }, + "sab": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Eastern Isthmic Chibchan", + "level4": "Guaymiic" + }, + "sac": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian", + "level4": "Fox" + }, + "sae": { + "level0": "Nambiquaran" + }, + "saf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Safaliba-Dagaare" + }, + "sag": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic", + "level7": "Nuclear Ngbandic", + "level8": "Sangoic" + }, + "sah": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Sakha-Dolgan" + }, + "saj": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Sahuan", + "level3": "Nuclear Sahuan", + "level4": "Sahu-Waioli" + }, + "sak": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ngomic", + "level8": "Nuclear Ngomic", + "level9": "Sake-Ndambomo" + }, + "san": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan" + }, + "sap": { + "level0": "Bookkeeping" + }, + "saq": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Lotuxo-Maa", + "level4": "Ongamo-Maa", + "level5": "Nuclear Maa" + }, + "sar": { + "level0": "Arawakan", + "level1": "Central-Eastern Maipuran", + "level2": "Central Maipuran", + "level3": "Xaray" + }, + "sas": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bali-Sasak-Sumbawa", + "level3": "Sasak-Sumbawa" + }, + "sat": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Santalic" + }, + "sau": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers", + "level4": "Amalumute", + "level5": "Northwest Seram" + }, + "sav": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Cangin", + "level3": "Saafi-Noon-Lehar" + }, + "saw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Unclassified Awyu-Dumut" + }, + "sax": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "South Pentecost" + }, + "say": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi", + "level7": "Zakse-Saya" + }, + "saz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Gujaratic" + }, + "sba": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Central", + "level6": "Sara Central Logone" + }, + "sbb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "West New Georgia", + "level11": "Simboic" + }, + "sbc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Kurti-Kele-Ere" + }, + "sbd": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Mande Samo" + }, + "sbe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic", + "level8": "Suau chain" + }, + "sbg": { + "level0": "West Bird's Head", + "level1": "Seget-Moi" + }, + "sbh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus I" + }, + "sbi": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei" + }, + "sbj": { + "level0": "Maban", + "level1": "Mabang", + "level2": "Maba-Masalit", + "level3": "Macro-Masalit" + }, + "sbk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mbeya" + }, + "sbl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic", + "level4": "Abellen-Botolan" + }, + "sbm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "West Ruvu", + "level11": "Vidunda-Sagala" + }, + "sbn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic", + "level9": "Unclassified Sindhic" + }, + "sbo": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "Senoic", + "level4": "Lanoh-Semnam-Temiar", + "level5": "Lanoh-Semnam", + "level6": "Lanohic" + }, + "sbp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Wanji-Sangu" + }, + "sbq": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Sogeram", + "level5": "North Sogeram" + }, + "sbr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Eastern Murutic", + "level8": "Selungai-Sembakung Murut" + }, + "sbs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Western Botatwe", + "level9": "Machili" + }, + "sbu": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Lahauli-Spiti" + }, + "sbw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "B10-B30", + "level8": "Okani (B.30)", + "level9": "Northern Okani", + "level10": "Himba-Pinji" + }, + "sbx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Ibanic", + "level5": "Iban-Mualang-Seberuang", + "level6": "Iban-Seberuang" + }, + "sby": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Greater Eastern Botatwe" + }, + "sbz": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Saraic", + "level5": "Sara Peripherique", + "level6": "Barh Keita", + "level7": "Sara-Kaba" + }, + "sca": { + "level0": "Bookkeeping" + }, + "scb": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Chutic", + "level3": "East Chutic" + }, + "sce": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Southern Periphery Mongolic", + "level3": "Shirongol", + "level4": "Baoanic" + }, + "scg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Southern Land Dayak" + }, + "sch": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin", + "level5": "Mizoic", + "level6": "Hmaric" + }, + "sci": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay" + }, + "sck": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan", + "level10": "Sadanic", + "level11": "Sadri-Panchpargania", + "level12": "India-Nepal-Bangladesh Sadri" + }, + "scl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic" + }, + "scn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Italo-Dalmatian", + "level9": "Italian Romance" + }, + "sco": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic" + }, + "scp": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Kyirong-Kagate", + "level9": "Yolmo-Kagate" + }, + "scq": { + "level0": "Austroasiatic", + "level1": "Pearic", + "level2": "Western Pearic", + "level3": "Southern Chong" + }, + "scs": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Slaveyic", + "level5": "Slave" + }, + "sct": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "West Katuic", + "level3": "Brou-So", + "level4": "Eastern Bru-Katang", + "level5": "Katang" + }, + "scu": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Western West Himalayish", + "level4": "Kinnauric", + "level5": "Thebor" + }, + "scv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos" + }, + "scw": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.4", + "level5": "Ronic" + }, + "scx": { + "level0": "Unclassifiable" + }, + "sda": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Torajic" + }, + "sdb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Gorani", + "level9": "Shabak-Bajalani" + }, + "sdc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian" + }, + "sde": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Kauru", + "level9": "Voric" + }, + "sdg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic", + "level8": "Western Shinaic" + }, + "sdh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Laki-Kurdish", + "level8": "Kurdish" + }, + "sdi": { + "level0": "Bookkeeping" + }, + "sdj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Nuclear Northern Kikongo" + }, + "sdk": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Sawosic", + "level3": "Iatmulic" + }, + "sdl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Arab Sign" + }, + "sdm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Southern Land Dayak" + }, + "sdn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Southern Romance", + "level8": "Sardo-Corsican", + "level9": "Corsic" + }, + "sdo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Bidayuh" + }, + "sdp": { + "level0": "Sino-Tibetan", + "level1": "Kho-Bwa", + "level2": "Western Kho-Bwa", + "level3": "Sartang-Sherdukpen" + }, + "sdr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan", + "level10": "Sadanic", + "level11": "Sadri-Panchpargania", + "level12": "India-Nepal-Bangladesh Sadri" + }, + "sds": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic", + "level4": "Zuara-Sened" + }, + "sdt": { + "level0": "Bookkeeping" + }, + "sdu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Uma-Sarudu" + }, + "sdx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Melanau-Kajang", + "level5": "Melanau", + "level6": "Sibu-Kanowit-Tanjong" + }, + "sea": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "Senoic" + }, + "sec": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "North Georgia Central Salish" + }, + "sed": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Hre-Sedang-Todrah-Monam", + "level4": "Hre-Sedang" + }, + "see": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian" + }, + "sef": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo" + }, + "seg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Mijikenda", + "level12": "Southern Mijikenda" + }, + "seh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja", + "level9": "Senaic" + }, + "sej": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Trans Vitiaz", + "level5": "Huon Tip" + }, + "sek": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Cordillera Athabaskan", + "level5": "Beaver-Sekani" + }, + "sel": { + "level0": "Uralic", + "level1": "Samoyedic", + "level2": "Kamas-Selkup" + }, + "sen": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "North Senufo" + }, + "sep": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "North Senufo", + "level5": "Supyiric" + }, + "seq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "North Senufo" + }, + "ser": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Californian Uto-Aztecan", + "level3": "Serran" + }, + "ses": { + "level0": "Songhay", + "level1": "Eastern Songhay" + }, + "set": { + "level0": "Sentanic", + "level1": "Nuclear Sentanic", + "level2": "Sentani-Nafri" + }, + "seu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Central Yapen", + "level8": "Serui-Busami" + }, + "sev": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "South Senufo" + }, + "sew": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Dobu-Duau linkage" + }, + "sey": { + "level0": "Tucanoan", + "level1": "Western Tucanoan", + "level2": "Napo Tucanoan", + "level3": "Siona-Secoya" + }, + "sez": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Maraic" + }, + "sfb": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Dutch-Belgian Sign", + "level4": "Belgian Sign" + }, + "sfe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Subanen", + "level4": "Nuclear Subanen", + "level5": "East Nuclear Subanen" + }, + "sfm": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Nuclear Hmongic", + "level4": "West Hmongic", + "level5": "Greater Chuanqiandian", + "level6": "Chuanqiandian" + }, + "sfs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic", + "level3": "South African Sign" + }, + "sfw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Bia", + "level8": "Northern Bia" + }, + "sga": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Insular Celtic", + "level6": "Goidelic" + }, + "sgb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic", + "level4": "Mag-Ayta" + }, + "sgc": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Central Kalenjin" + }, + "sgd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "South Bisayan", + "level6": "Surigao" + }, + "sge": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Modang-Segai" + }, + "sgg": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "DGSic" + }, + "sgh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Eastern Iranian", + "level5": "Shughni-Yazgulami", + "level6": "Shughnic" + }, + "sgi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute" + }, + "sgk": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Bisoid" + }, + "sgm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "North Nyanza" + }, + "sgo": { + "level0": "Bookkeeping" + }, + "sgp": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Jingpho-Luish", + "level3": "Jingpho" + }, + "sgr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Komisenian" + }, + "sgt": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic" + }, + "sgu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "East Seram", + "level4": "Setic" + }, + "sgw": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Outer South Ethiopic", + "level6": "TT-Group" + }, + "sgx": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "sgy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Sanglechi-Ishkashimi" + }, + "sgz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage" + }, + "sha": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Beromic" + }, + "shb": { + "level0": "Yanomamic", + "level1": "Ninam-Yanomam-Yaroame" + }, + "shc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Northern Njila", + "level9": "Mbala-Holu-Sondi (K.10)", + "level10": "Mbala-Sondi" + }, + "shd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic" + }, + "she": { + "level0": "Dizoid" + }, + "shg": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "Ost-Kxoe" + }, + "shh": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Numic", + "level3": "Central Numic" + }, + "shi": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Kabyle-Atlas Berber", + "level3": "Atlas Berber" + }, + "shj": { + "level0": "Dajuic", + "level1": "Eastern Dajuic" + }, + "shk": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Northern Lwoo" + }, + "shl": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Maraic", + "level5": "Nuclear Maraic" + }, + "shm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic", + "level10": "Khalkhalic" + }, + "shn": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Southern Shanic" + }, + "sho": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Busan", + "level5": "Kyenga-Shanga" + }, + "shp": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Chama subgroup", + "level5": "Shipibo-Konibo-Kapanawa" + }, + "shq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Greater Eastern Botatwe", + "level9": "Central Eastern Botatwe", + "level10": "Kafue" + }, + "shr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "Forest Kivu" + }, + "shs": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Northern Interior Salish", + "level3": "Thompsonic" + }, + "sht": { + "level0": "Shastan", + "level1": "Nuclear Shastan" + }, + "shu": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Egyptic Arabic", + "level7": "Egypto-Sudanic Arabic", + "level8": "Sudanese-Chadian Arabic" + }, + "shv": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Modern South Arabian", + "level4": "Eastern MSA" + }, + "shw": { + "level0": "Heibanic", + "level1": "West-Central Heibanic" + }, + "shx": { + "level0": "Hmong-Mien", + "level1": "Hmongic", + "level2": "Nuclear Hmongic-Ho Ne", + "level3": "Jiongnai-Ho Ne", + "level4": "Ho Neic" + }, + "shy": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic" + }, + "sia": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Eastern Saami", + "level3": "Mainland Eastern Saami" + }, + "sib": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Lowland Kenyah", + "level6": "Western Lowland Kenyah-Penan", + "level7": "Penan", + "level8": "Western Penan-Sebop" + }, + "sic": { + "level0": "Bookkeeping" + }, + "sid": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Highland East Cushitic", + "level4": "Sidaama-Hadiyya-Kambaata", + "level5": "Sidaama-Gedeo" + }, + "sie": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Greater Luyana", + "level8": "Western Greater Luyana", + "level9": "Simaaic" + }, + "sig": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Sisaala" + }, + "sih": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Southern New Caledonian", + "level8": "Mid-Southern New Caledonian" + }, + "sij": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf" + }, + "sik": { + "level0": "Bookkeeping" + }, + "sil": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Sisaala" + }, + "sim": { + "level0": "Sepik", + "level1": "Nukuma", + "level2": "Kwanga-Mende" + }, + "sin": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Dhivehi-Sinhala", + "level6": "Sinhalaic" + }, + "sip": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Southern Tibetic", + "level7": "Dzongkhic" + }, + "siq": { + "level0": "Bosavi", + "level1": "Bosavi Watershed", + "level2": "Kaluli-Sunia" + }, + "sir": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2", + "level6": "Western West Chadic B.2" + }, + "siu": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Galu-Alu" + }, + "siv": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Central Sepik Hill", + "level3": "Nuclear Central Sepik Hill" + }, + "siw": { + "level0": "South Bougainville", + "level1": "Buinic" + }, + "six": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Peka" + }, + "siy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Central Iran Kermanic" + }, + "siz": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Libyan-Egyptian Oases Berber" + }, + "sja": { + "level0": "Chocoan", + "level1": "Embera", + "level2": "San Juan" + }, + "sjb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Punan Tubu-Bah" + }, + "sjd": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Eastern Saami", + "level3": "Peninsular Eastern Saami" + }, + "sje": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Western Saami", + "level3": "Central Western Saami", + "level4": "Lule-Pite Saami" + }, + "sjg": { + "level0": "Tamaic", + "level1": "Tama-Sungor-Miisiirii", + "level2": "Tama-Sungor" + }, + "sjk": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Eastern Saami", + "level3": "Mainland Eastern Saami" + }, + "sjl": { + "level0": "Sino-Tibetan", + "level1": "Miji" + }, + "sjm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw", + "level5": "Sulu-Borneo", + "level6": "Borneo Coast Bajaw" + }, + "sjn": { + "level0": "Artificial Language" + }, + "sjo": { + "level0": "Tungusic", + "level1": "Manchu-Jurchen", + "level2": "Manchu-Xibe" + }, + "sjp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Kamrupa", + "level10": "Kamta", + "level11": "Western Kamta" + }, + "sjr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage" + }, + "sjs": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Kabyle-Atlas Berber", + "level3": "Atlas Berber", + "level4": "Northwestern Moroccan Berber" + }, + "sjt": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Eastern Saami", + "level3": "Peninsular Eastern Saami" + }, + "sju": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Western Saami", + "level3": "Southwestern Saami" + }, + "sjw": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Great Lakes Algonquian" + }, + "skb": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek" + }, + "skc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap", + "level4": "Sauk-Nimi" + }, + "skd": { + "level0": "Miwok-Costanoan", + "level1": "Miwokan", + "level2": "Eastern Miwokan", + "level3": "Sierra Miwokan" + }, + "ske": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "South Pentecost", + "level7": "Seke-Sowa" + }, + "skf": { + "level0": "Tupian", + "level1": "Arikem-Tupari", + "level2": "Tuparic", + "level3": "Nuclear Tuparic", + "level4": "Corumbiara" + }, + "skg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "Southwestern Malagasic", + "level7": "South West-Central Malagasic", + "level8": "Nuclear South West-Central Malagasic", + "level9": "Inland-Western Malagasic", + "level10": "Western Malagasic" + }, + "skh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran", + "level3": "Central Barrier Islands" + }, + "ski": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata" + }, + "skj": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic" + }, + "skl": { + "level0": "Bookkeeping" + }, + "skm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Uruwa", + "level4": "Sakam-Som" + }, + "skn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Subanen", + "level4": "Western Subanen" + }, + "sko": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Seko" + }, + "skp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Melanau-Kajang", + "level5": "Kajang" + }, + "skq": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Soninke-Bozo", + "level4": "Soninkean" + }, + "skr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic", + "level9": "Hindko-Siraiki", + "level10": "Siraikic" + }, + "sks": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kaukombaran" + }, + "skt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Kwa-Kasai North", + "level15": "Sakata-Tiinic" + }, + "sku": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "South Santo" + }, + "skv": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Skouic" + }, + "skw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Southwestern Dutch", + "level9": "Zeeuwic" + }, + "skx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "Rampi-Seko-Badaic", + "level4": "Seko" + }, + "sky": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian" + }, + "skz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuclear Tanimbar-Bomberai", + "level4": "Yamdena-Onin", + "level5": "Oninic" + }, + "slb": { + "level0": "Bookkeeping" + }, + "slc": { + "level0": "Saliban" + }, + "sld": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Sisaala", + "level11": "Northwestern Sisaala" + }, + "sle": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid" + }, + "slf": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Italian Sign" + }, + "slg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Eastern Murutic", + "level8": "Selungai-Sembakung Murut" + }, + "slh": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "Lushootseed-Puget" + }, + "sli": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "East Middle German", + "level8": "Schlesisch-Wilmesau" + }, + "slj": { + "level0": "Bookkeeping" + }, + "slk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Czech-Slovak" + }, + "sll": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Simbu", + "level3": "Nuclear Simbu", + "level4": "Golinic" + }, + "slm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw", + "level5": "Sulu-Borneo" + }, + "slp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Lembata", + "level4": "Lamaholot Barat", + "level5": "Flores Lamaholot" + }, + "slq": { + "level0": "Bookkeeping" + }, + "slr": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz" + }, + "sls": { + "level0": "Bookkeeping" + }, + "slt": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Sila-Wanya-Cosao" + }, + "slu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "South Tanimbar" + }, + "slv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "South Slavic", + "level5": "Western South Slavic" + }, + "slw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Eastern Huon", + "level4": "Kalasa" + }, + "slx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Chokwe-Lunda", + "level10": "Ruund-Salampasu" + }, + "sly": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Makassaric" + }, + "slz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "Maya-Matbat", + "level6": "Raja Ampat Maya" + }, + "sma": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Western Saami", + "level3": "Southwestern Saami" + }, + "smb": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Baruya-Simbari" + }, + "smc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Uruwa", + "level4": "Sakam-Som" + }, + "sme": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Western Saami", + "level3": "Central Western Saami" + }, + "smf": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic", + "level3": "Simog-Daonda" + }, + "smg": { + "level0": "Baining" + }, + "smh": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nasu-Gepu" + }, + "smj": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Western Saami", + "level3": "Central Western Saami", + "level4": "Lule-Pite Saami" + }, + "smk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic", + "level4": "Tina-Bolinao" + }, + "sml": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw", + "level5": "Sulu-Borneo", + "level6": "Inner Sulu Sama" + }, + "smm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Unclassified Tharu" + }, + "smn": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Eastern Saami", + "level3": "Mainland Eastern Saami" + }, + "smo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Ellicean", + "level9": "Pukapukic", + "level10": "Samoan-Tokelauan" + }, + "smp": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Canaanite", + "level6": "Hebrewic" + }, + "smq": { + "level0": "East Strickland", + "level1": "Kubo-Samo-Bibo" + }, + "smr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sumatran" + }, + "sms": { + "level0": "Uralic", + "level1": "Saami", + "level2": "Eastern Saami", + "level3": "Mainland Eastern Saami" + }, + "smt": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Sizangic" + }, + "smu": { + "level0": "Austroasiatic", + "level1": "Pearic", + "level2": "Western Pearic" + }, + "smv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani", + "level9": "Old-Modern Marathi", + "level10": "Modern Marathi" + }, + "smw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bali-Sasak-Sumbawa", + "level3": "Sasak-Sumbawa" + }, + "smx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Hungan-Samba" + }, + "smy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian" + }, + "smz": { + "level0": "South Bougainville", + "level1": "Nasioiic", + "level2": "Nasioi", + "level3": "Simekuic" + }, + "sna": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Core Shona", + "level10": "Plateau Shona", + "level11": "Central Shona" + }, + "snc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "Sinagoro-Keapara" + }, + "snd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic", + "level9": "Sindhi-Kachchi" + }, + "sne": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Bidayuh", + "level5": "Central-Western Bidayuh" + }, + "snf": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Cangin", + "level3": "Saafi-Noon-Lehar", + "level4": "Noon-Lehar" + }, + "sng": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde", + "level9": "Kaonde-Shaba-Sanga" + }, + "snh": { + "level0": "Unattested", + "level1": "Pano-Tacanan (Unattested)" + }, + "sni": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Chama subgroup" + }, + "snj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic", + "level7": "Nuclear Ngbandic", + "level8": "Sangoic" + }, + "snk": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Soninke-Bozo", + "level4": "Soninkean" + }, + "snl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sangiric", + "level3": "Northern Sangiric", + "level4": "Sangil-Sangir" + }, + "snm": { + "level0": "Central Sudanic", + "level1": "Moru-Madi", + "level2": "Southern Moru-Madi" + }, + "snn": { + "level0": "Tucanoan", + "level1": "Western Tucanoan", + "level2": "Napo Tucanoan", + "level3": "Siona-Secoya", + "level4": "Sionan" + }, + "snp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria" + }, + "snq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic", + "level21": "Lumbuic", + "level22": "Ngubi-Sangu-Sira-Punu", + "level23": "Sangu-Sira-Punu", + "level24": "Sangu-Sira" + }, + "snr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Gum" + }, + "sns": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Southwestern Malakula", + "level10": "Southwest Coastal Malekula" + }, + "snu": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic" + }, + "snv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Dayic" + }, + "snw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo", + "level4": "Lelemic", + "level5": "Likpe-Santrokofi" + }, + "snx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Mindjim", + "level4": "Lower Minjim", + "level5": "Inland Minjim" + }, + "sny": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Western Sepik Hill" + }, + "snz": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Evapia", + "level4": "Nuclear Evapia" + }, + "soa": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "Black Tai" + }, + "sob": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Sobeic", + "level9": "Sobei-Liki" + }, + "soc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Basoo" + }, + "sod": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Mituku-Lega", + "level9": "Songola-Binja" + }, + "soe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Nkutsu-Lokenye", + "level12": "Songomenic" + }, + "sog": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Sogdic-Ossetic", + "level6": "Sogdic", + "level7": "Sogdian-Yagnobi" + }, + "soh": { + "level0": "Eastern Jebel", + "level1": "Aka-Kelo-Molo" + }, + "soi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Eastern Tharu", + "level11": "Dangaura-Khuna-Sonaha" + }, + "soj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Central Iran Kermanic", + "level8": "Nuclear Central Iran Kermanic", + "level9": "Kashanic" + }, + "sok": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.3", + "level5": "Sokoroic", + "level6": "Saba-Sokoro-Tamki" + }, + "sol": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic" + }, + "som": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana" + }, + "soo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Nsong-Mpiin-Ngong" + }, + "sop": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde" + }, + "soq": { + "level0": "Dagan", + "level1": "Southeast Dagan" + }, + "sor": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.1", + "level5": "Sumrayic", + "level6": "Sarwa-Sumray" + }, + "sos": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Northwestern Mande", + "level3": "Duun-Bobo", + "level4": "Duun-Jo", + "level5": "Duun-Seenku" + }, + "sot": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Sotho-Tswana (S.30)", + "level11": "Western Sotho-Tswana", + "level12": "Central Sotho-Tswana", + "level13": "Sesotho-Lozi" + }, + "sou": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Thai PH", + "level9": "Lao-Thai" + }, + "sov": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Western Trukic", + "level10": "Sonsorol-Tobi" + }, + "sow": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic", + "level3": "Waina-Punda" + }, + "sox": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Western A80", + "level10": "Makaaic", + "level11": "Southern Makaaic" + }, + "soy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Unclassified North Volta-Congo" + }, + "soz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Gikuyu-Temi" + }, + "spa": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic" + }, + "spb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Eastern Littoral Piru Bay" + }, + "spd": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Greater Yaganon", + "level4": "Yaganon", + "level5": "Ganglau-Saep" + }, + "spe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Manamic linkage", + "level9": "Bam-Manam", + "level10": "Manam-Sepa" + }, + "spg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Aput-Busang-Merah-Kohi" + }, + "spi": { + "level0": "Lakes Plain", + "level1": "Far West Lakes Plain", + "level2": "Rasawa-Saponi" + }, + "spk": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Sawosic" + }, + "spl": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Cromwell", + "level5": "Kabwum", + "level6": "Selepet-Komba" + }, + "spm": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ruboni", + "level3": "Mikarewan" + }, + "spn": { + "level0": "Lengua-Mascoy", + "level1": "Eastern Enlhet-Enenlhet" + }, + "spo": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Southern Interior Salish", + "level3": "Okanaganic", + "level4": "Kalispel-Spokane" + }, + "spp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "North Senufo", + "level5": "Supyiric" + }, + "spq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic", + "level13": "South Castilic" + }, + "spr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Uliase", + "level8": "Hatuhaha", + "level9": "Saparuan", + "level10": "Saparua-Latu" + }, + "sps": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic", + "level11": "Buka", + "level12": "Saposa-Tinputz" + }, + "spt": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Lahauli-Spiti", + "level7": "Spiti-Jad" + }, + "spu": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Nuclear West Bahnaric" + }, + "spv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Macro-Oriya" + }, + "spy": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Elgon-Mau Kalenjin" + }, + "sqa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Kamuku-Hungwarya", + "level7": "Kamuku", + "level8": "Rogo-Sagamuk-Sama-Sambuga", + "level9": "Sagamuk-Sama-Sambuga" + }, + "sqh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "Lameic" + }, + "sqk": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "sqm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gbaya-Manza-Ngbaka", + "level4": "Gbaya Meridional-Occidental", + "level5": "Bokoto-Gbeya", + "level6": "Gbeya", + "level7": "Gbeya-Suma" + }, + "sqn": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian" + }, + "sqo": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Komisenian" + }, + "sqq": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Nuclear West Bahnaric", + "level4": "Loven-Suq" + }, + "sqs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic" + }, + "sqt": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Modern South Arabian", + "level4": "Eastern MSA" + }, + "squ": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "South Georgia Central Salish" + }, + "sqx": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "sra": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Silopic" + }, + "srb": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "Sora-Juray-Gorum", + "level3": "Sora-Juray" + }, + "src": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Southern Romance", + "level8": "Sardo-Corsican", + "level9": "Sardinian" + }, + "sre": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Benyadu-Bekati", + "level4": "Bakati'", + "level5": "Rara-Sara Bakati'" + }, + "srf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Lower Markham", + "level9": "Busu", + "level10": "Musom-Sirak" + }, + "srg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "West Bisayan", + "level6": "Kinarayan" + }, + "srh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Eastern Iranian", + "level5": "Shughni-Yazgulami", + "level6": "Shughnic" + }, + "sri": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Western Eastern Tucanoan", + "level3": "Cubeo-Desano", + "level4": "Yupua-Siriano-Desano", + "level5": "Siriano-Desano" + }, + "srk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Paitanic" + }, + "srl": { + "level0": "Greater Kwerba" + }, + "srm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Surinamese Creole English" + }, + "srn": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Surinamese Creole English", + "level13": "Eastern Maroons" + }, + "sro": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Southern Romance", + "level8": "Sardo-Corsican", + "level9": "Sardinian" + }, + "srp": { + "level0": "Indo-European", + "level1": "Balto-Slavic" + }, + "srq": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup II", + "level7": "Warazu-Sirionoid", + "level8": "Sirionoid" + }, + "srr": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Fula-Sereer" + }, + "srs": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan" + }, + "srt": { + "level0": "Geelvink Bay", + "level1": "Barapasi-Sauri-Kofei", + "level2": "Sauri-Kofei" + }, + "sru": { + "level0": "Tupian", + "level1": "Monde" + }, + "srv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Warayan" + }, + "srw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Teun-Nila-Serua", + "level5": "Nila-Serua" + }, + "srx": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Nuclear Himachali" + }, + "sry": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Siau", + "level8": "Sissano-Tumleo", + "level9": "Sera-Sissano" + }, + "srz": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Caspian", + "level8": "Mazanderani-Shahmirzadi" + }, + "ssb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw", + "level5": "Sulu-Borneo", + "level6": "Inner Sulu Sama" + }, + "ssc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "North Mara", + "level12": "Kuriaic" + }, + "ssd": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Kabenau" + }, + "sse": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw", + "level5": "Sulu-Borneo", + "level6": "Inner Sulu Sama" + }, + "ssf": { + "level0": "Austronesian", + "level1": "Western Plains Austronesian" + }, + "ssg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Western Admiralty Islands" + }, + "ssh": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Arabian Peninsula Arabic", + "level7": "North Arabian Beduin Arabic", + "level8": "Dhofaric" + }, + "ssi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Greater Panjabic", + "level9": "Eastern Panjabic" + }, + "ssj": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Evapia", + "level4": "Nuclear Evapia", + "level5": "Kesawai-Wia" + }, + "ssk": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Central-Eastern West Himalayish" + }, + "ssl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Sisaala", + "level11": "Northwestern Sisaala" + }, + "ssm": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "Senoic", + "level4": "Lanoh-Semnam-Temiar", + "level5": "Lanoh-Semnam" + }, + "ssn": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Oromoid", + "level7": "Nuclear Oromo", + "level8": "Central-Eastern Oromo", + "level9": "South-East-North Oromo" + }, + "sso": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Siau", + "level8": "Sissano-Tumleo", + "level9": "Sera-Sissano", + "level10": "Sissanoic" + }, + "ssp": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Spanish Sign", + "level3": "Nuclear Spanish Sign" + }, + "ssr": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic" + }, + "sss": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "West Katuic", + "level3": "Brou-So", + "level4": "Western Bru-So" + }, + "sst": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Simbu", + "level3": "Nuclear Simbu", + "level4": "Golinic" + }, + "ssu": { + "level0": "Angan", + "level1": "Nuclear Angan", + "level2": "Wojokesic", + "level3": "Kamasa-Susuami" + }, + "ssv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "Shark Bayic" + }, + "ssw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Nguni (S.40)", + "level12": "Nuclear Nguni", + "level13": "Southern Ndebele-Lowland", + "level14": "Swatic" + }, + "ssx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Enga-Kewa-Huli", + "level2": "Kewa-Huli", + "level3": "Sau-Angal-Kewa" + }, + "ssy": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Saho-Afar" + }, + "ssz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Pasismanua" + }, + "sta": { + "level0": "Pidgin", + "level1": "Swahili-based pidgin", + "level2": "Upcountry Swahili" + }, + "stb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Subanen", + "level4": "Nuclear Subanen", + "level5": "East Nuclear Subanen" + }, + "stc": { + "level0": "Bookkeeping" + }, + "std": { + "level0": "Unattested" + }, + "ste": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "East Seram", + "level4": "Setic" + }, + "stf": { + "level0": "Nuclear Torricelli", + "level1": "West Wapei" + }, + "stg": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Triengic" + }, + "sth": { + "level0": "Speech Register", + "level1": "Irish-English" + }, + "sti": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau", + "level5": "Stieng" + }, + "stj": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Mande Samo" + }, + "stk": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Tonda" + }, + "stm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Division A Mountain Ok", + "level7": "Tifal-Telefol", + "level8": "Tifalic", + "level9": "Faiwol-Seltaman" + }, + "stn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Longgu-Malaita-Makira", + "level6": "Malaita-Makira", + "level7": "Makira" + }, + "sto": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Dakotan", + "level3": "Nakoda" + }, + "stp": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tepiman", + "level3": "Tepehuan", + "level4": "Southern Tepehuan" + }, + "stq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Frisian" + }, + "str": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish", + "level3": "Straits Salish" + }, + "sts": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Gawarbatic", + "level5": "Shumashtic" + }, + "stt": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "South Bahnaric", + "level4": "Mnong-Stieng-Chrau", + "level5": "Stieng" + }, + "stu": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Waic", + "level5": "Bulangic" + }, + "stv": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Harari-East Gurage", + "level6": "Silte-Wolane" + }, + "stw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Satawalese-Carolinian" + }, + "sty": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "North Kipchak" + }, + "sub": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Yaka-Suku" + }, + "suc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Subanen", + "level4": "Western Subanen" + }, + "sue": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "North Binanderean" + }, + "suf": { + "level0": "Bookkeeping" + }, + "sug": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Mianic" + }, + "suh": { + "level0": "Bookkeeping" + }, + "sui": { + "level0": "Suki-Gogodala" + }, + "suj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "West Highlands Kivu", + "level12": "Rundic", + "level13": "Hangaza-Shubi" + }, + "suk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Sukuma-Nyamwezi (F.20)", + "level9": "Nyamwezic" + }, + "sun": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian" + }, + "suo": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Barupu Lagoon" + }, + "suq": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southeast Surmic", + "level3": "Pastoral Surmic", + "level4": "Tirma-Chai-Mursi" + }, + "sur": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3" + }, + "sus": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Susu-Yalunka" + }, + "sut": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Tlapanec-Manguean", + "level3": "Subtiaba-Tlapanec" + }, + "suu": { + "level0": "Bookkeeping" + }, + "suv": { + "level0": "Sino-Tibetan", + "level1": "Kho-Bwa", + "level2": "Puroikic" + }, + "suw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Sukuma-Nyamwezi (F.20)" + }, + "suy": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Goyaz", + "level4": "Northern Je" + }, + "suz": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Northwestern Kiranti", + "level5": "Bahing-Sunwar" + }, + "sva": { + "level0": "Kartvelian" + }, + "svb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Siau" + }, + "svc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Vincent-Grenadian Creole" + }, + "sve": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Masela-South Babar" + }, + "svk": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Central European Sign", + "level4": "Nuclear Central European Sign" + }, + "svm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "South Slavic", + "level5": "Western South Slavic" + }, + "svr": { + "level0": "Bookkeeping" + }, + "swa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid" + }, + "swb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Comorian Bantu", + "level12": "Shindzwani-Shimaore" + }, + "swc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Sabaki-Swahili", + "level11": "Swahili (G.40)", + "level12": "Mombasa-Lamu-Inland Swahili" + }, + "swe": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "North Germanic", + "level5": "North Scandinavian", + "level6": "East-Central Swedic", + "level7": "East Swedic" + }, + "swf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Sere-Indri", + "level7": "Sere-Bviri", + "level8": "Ndogo-Sere", + "level9": "Tagbu-Sere" + }, + "swg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Alemannic", + "level10": "North Alemannic" + }, + "swh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Sabaki-Swahili", + "level11": "Swahili (G.40)", + "level12": "Mombasa-Lamu-Inland Swahili" + }, + "swi": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Then-MMS", + "level4": "Maonan-Mak-Sui" + }, + "swj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic", + "level21": "Lumbuic", + "level22": "Ngubi-Sangu-Sira-Punu", + "level23": "Sangu-Sira-Punu", + "level24": "Sangu-Sira", + "level25": "Sira-Barama" + }, + "swk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Sena-Nyanja", + "level9": "Senaic" + }, + "swl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Swedish Sign" + }, + "swm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Wamas-Samosa-Murupi-Mosimo" + }, + "swn": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Libyan-Egyptian Oases Berber" + }, + "swo": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano", + "level5": "Yaminawa Complex" + }, + "swp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic", + "level8": "Suau chain" + }, + "swq": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Sharwa-Tsuvan" + }, + "swr": { + "level0": "Yawa-Saweru" + }, + "sws": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "South Tanimbar" + }, + "swt": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "East Alor", + "level3": "Sawila-Wersing" + }, + "swu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Gorontalo-Mongondow", + "level4": "Gorontalic" + }, + "swv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani" + }, + "sww": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "South Pentecost", + "level7": "Seke-Sowa" + }, + "swx": { + "level0": "Arawan" + }, + "swy": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.1", + "level5": "Sumrayic", + "level6": "Sarwa-Sumray" + }, + "sxb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "North Mara" + }, + "sxc": { + "level0": "Unclassifiable" + }, + "sxe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ndasaic", + "level8": "Samayic" + }, + "sxg": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Naic" + }, + "sxk": { + "level0": "Kalapuyan" + }, + "sxm": { + "level0": "Bookkeeping" + }, + "sxn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sangiric", + "level3": "Northern Sangiric", + "level4": "Sangil-Sangir" + }, + "sxr": { + "level0": "Austronesian", + "level1": "Tsouic", + "level2": "Kanakanavu-Saaroa" + }, + "sxs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Igwic", + "level7": "Sasaru-Igwe" + }, + "sxu": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "East Middle German" + }, + "sxw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Western Phla-Phera" + }, + "sya": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "North West Greater Barito" + }, + "syb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Subanen", + "level4": "Nuclear Subanen", + "level5": "East Nuclear Subanen" + }, + "syc": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic" + }, + "syi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30" + }, + "syk": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara" + }, + "syl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga", + "level10": "Eastern Bengali" + }, + "sym": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Mande Samo" + }, + "syo": { + "level0": "Austroasiatic", + "level1": "Pearic", + "level2": "Western Pearic", + "level3": "Southern Chong" + }, + "sys": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi" + }, + "syw": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Kyirong-Kagate", + "level9": "Yolmo-Kagate" + }, + "syx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ndasaic", + "level8": "Samayic" + }, + "syy": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "sza": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "South Aslian", + "level3": "Semelai-Semaq" + }, + "szb": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok" + }, + "szc": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "South Aslian", + "level3": "Semelai-Semaq" + }, + "szd": { + "level0": "Bookkeeping" + }, + "sze": { + "level0": "Blue Nile Mao", + "level1": "West Mao", + "level2": "Hozo-Seze" + }, + "szg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Mongoic" + }, + "szl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "West Slavic", + "level5": "Lechitic", + "level6": "Polish-Silesian" + }, + "szn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "West Central Maluku", + "level3": "Sula-Buru" + }, + "szp": { + "level0": "Inanwatan" + }, + "szs": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "BSLic", + "level3": "BANZL", + "level4": "Auslanic" + }, + "szv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Dualaic", + "level9": "Kole-Isubu" + }, + "szw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "South Halmahera", + "level6": "Central-Eastern South Halmahera" + }, + "szy": { + "level0": "Austronesian", + "level1": "East Formosan", + "level2": "Central East Formosan" + }, + "taa": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Tanana-Tutchone", + "level5": "Tananaic" + }, + "tab": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Eastern Samur", + "level5": "Tabasaran-Aghul-Lezgi" + }, + "tac": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tarahumara-Guarijio", + "level3": "Tarahumaran" + }, + "tad": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "West Tariku" + }, + "tae": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Northeast Japura-Colombia", + "level4": "Baniwa-Curripaco-Tariano" + }, + "taf": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup IV" + }, + "tag": { + "level0": "Rashad" + }, + "tah": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Northern Outlier Polynesian-East Polynesian", + "level9": "Solomons Northern Outlier Polynesian-East Polynesian", + "level10": "Central Northern Outlier Polynesian-East Polynesian", + "level11": "East Polynesian", + "level12": "East Polynesian Proximal", + "level13": "Southern East Polynesian Proximal", + "level14": "Tahitian-Austral" + }, + "taj": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic", + "level5": "Nuclear Tamang" + }, + "tak": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Guruntumic", + "level7": "Tala-Sho-Zangwal", + "level8": "Tala-Zamwar" + }, + "tal": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.3", + "level6": "Goemaic", + "level7": "Talic" + }, + "tam": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Tamil-Paliyan" + }, + "tan": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.2-3", + "level5": "West Chadic A.2", + "level6": "Tangalic", + "level7": "Nuclear Tangalic", + "level8": "Tangale-Kwami-Kupto" + }, + "tao": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Batanic", + "level3": "Yami-Itbayat" + }, + "tap": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Sabi", + "level8": "Malungu-Central Sabi" + }, + "taq": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Tuareg", + "level3": "Southern Tuareg" + }, + "tar": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tarahumara-Guarijio", + "level3": "Tarahumaran" + }, + "tas": { + "level0": "Pidgin", + "level1": "French-based pidgin" + }, + "tat": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "North Kipchak", + "level6": "Bashkiric" + }, + "tau": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Tanana-Tutchone", + "level5": "Tananaic", + "level6": "Upper Tananaic" + }, + "tav": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan I", + "level4": "Bara-Tatuyo" + }, + "taw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "Kalam-Kobon", + "level4": "Etp-Ti Kalam" + }, + "tax": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.3", + "level5": "Sokoroic", + "level6": "Saba-Sokoro-Tamki" + }, + "tay": { + "level0": "Austronesian", + "level1": "Atayalic" + }, + "taz": { + "level0": "Narrow Talodi", + "level1": "Buram-Saraf", + "level2": "Acheron-Tocho" + }, + "tbb": { + "level0": "Bookkeeping" + }, + "tbc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Western Bel" + }, + "tbe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Utupua-Vanikoro", + "level6": "Utupua" + }, + "tbf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tabar linkage" + }, + "tbg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Tairora" + }, + "tbh": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Yuin-Kuri", + "level4": "Yuin", + "level5": "Northern Costal Yuin" + }, + "tbi": { + "level0": "Eastern Jebel" + }, + "tbj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tungak-Nalik" + }, + "tbk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Kalamian" + }, + "tbl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bilic", + "level3": "Tboli-Blaan" + }, + "tbm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Sere-Indri", + "level7": "Sere-Bviri", + "level8": "Ndogo-Sere", + "level9": "Tagbu-Sere" + }, + "tbn": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Southern Magdalenic", + "level4": "Tunebo" + }, + "tbo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage" + }, + "tbp": { + "level0": "Lakes Plain", + "level1": "East Lakes Plain" + }, + "tbr": { + "level0": "Kadugli-Krongo", + "level1": "Central-Western Kadugli-Krongo", + "level2": "Krongo-Tumtum" + }, + "tbs": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Ataitan" + }, + "tbt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "Forest Kivu" + }, + "tbu": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan" + }, + "tbw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Palawanic", + "level4": "Northern Palawanic" + }, + "tbx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage" + }, + "tby": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran" + }, + "tbz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Oti-Volta Oriental", + "level10": "Waama-Tayari-Ditammari", + "level11": "Tayari-Ditammari", + "level12": "Ditammaric" + }, + "tca": { + "level0": "Ticuna-Yuri" + }, + "tcb": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Tanana-Tutchone", + "level5": "Tananaic", + "level6": "Upper Tananaic" + }, + "tcc": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Tatoga-Omotik", + "level3": "Gemein Datooga", + "level4": "North-Central Datooga" + }, + "tcd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Ka-Togo", + "level4": "Avatime-Nyangbo", + "level5": "Nyangbo-Tafi" + }, + "tce": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Tanana-Tutchone", + "level5": "Tutchone" + }, + "tcf": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Tlapanec-Manguean", + "level3": "Subtiaba-Tlapanec", + "level4": "Mephaa", + "level5": "North-Central Mephaa" + }, + "tcg": { + "level0": "Kayagaric", + "level1": "Kaygir-Tamagario" + }, + "tch": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Gullah-Nevis-Antigua", + "level15": "Gullah", + "level16": "Bahamian Gullah" + }, + "tci": { + "level0": "Yam", + "level1": "Morehead-Maro", + "level2": "Tonda", + "level3": "Eastern Tonda" + }, + "tck": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Mbere (B.60)", + "level19": "Tsitsekeic" + }, + "tcl": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Jingpho-Luish", + "level3": "Luish", + "level4": "Unclassified Luish" + }, + "tcn": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Dolpo-Tichurong" + }, + "tco": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Mranmaic" + }, + "tcp": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Core Central Kuki-Chin" + }, + "tcq": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Doutai-Kai-Waritai" + }, + "tcs": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Pacific Creole English", + "level12": "Early Melanesian Pidgin" + }, + "tct": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Kam-Sui", + "level3": "Then-MMS" + }, + "tcu": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tarahumara-Guarijio", + "level3": "Tarahumaran" + }, + "tcw": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan", + "level3": "Northern Totonacan", + "level4": "Necaxan" + }, + "tcx": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda" + }, + "tcy": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "South-Western Dravidian", + "level4": "Tuluic" + }, + "tcz": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Thadoic" + }, + "tda": { + "level0": "Songhay", + "level1": "Northwest Songhay", + "level2": "Northern Songhay" + }, + "tdb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Magadhan", + "level10": "Sadanic", + "level11": "Sadri-Panchpargania" + }, + "tdc": { + "level0": "Chocoan", + "level1": "Embera", + "level2": "San Juan", + "level3": "Upper San Juan" + }, + "tdd": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Northern Shanic" + }, + "tde": { + "level0": "Dogon", + "level1": "West Dogon" + }, + "tdf": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Triengic" + }, + "tdg": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic", + "level5": "Nuclear Tamang" + }, + "tdh": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Thulung-Tilung-Koyi" + }, + "tdi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Western Bungku-Tolaki", + "level8": "Interior Bungku-Tolaki" + }, + "tdj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Southern Tomini" + }, + "tdk": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic A", + "level4": "West Chadic A.4", + "level5": "Fyer-Tambas" + }, + "tdl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Tarokoid", + "level5": "Bijimic-Sur-Shall" + }, + "tdn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Minahasan", + "level3": "North Minahasan", + "level4": "Northeast Minahasan" + }, + "tdo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Yandangic", + "level7": "Waka-Yendang-Teme" + }, + "tdq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Unclassified Benue-Congo" + }, + "tdr": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Hre-Sedang-Todrah-Monam" + }, + "tds": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Doutai-Kai-Waritai" + }, + "tdt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Eastern Timor", + "level4": "Central Timoric A", + "level5": "Tetunic" + }, + "tdv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Alumic" + }, + "tdx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "Southwestern Malagasic" + }, + "tdy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Mangyan" + }, + "tea": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "Senoic", + "level4": "Lanoh-Semnam-Temiar" + }, + "teb": { + "level0": "Bookkeeping" + }, + "tec": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Central Kalenjin", + "level4": "Plateau Central Kalenjin", + "level5": "Western Plateau Central Kalenjin" + }, + "ted": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Grebo-Aizi", + "level4": "Grebo", + "level5": "Ivorian Grebo", + "level6": "Tepo-Plapo" + }, + "tee": { + "level0": "Totonacan", + "level1": "Tepehua" + }, + "tef": { + "level0": "Austroasiatic", + "level1": "Nicobaric", + "level2": "Nuclear Nicobaric", + "level3": "Chowra-Teressa" + }, + "teg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Mbere (B.60)" + }, + "teh": { + "level0": "Chonan", + "level1": "Continental Chonan" + }, + "tei": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Kombio-Yambes", + "level3": "Kombioic" + }, + "tek": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie" + }, + "tel": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Teluguic" + }, + "tem": { + "level0": "Atlantic-Congo", + "level1": "Mel", + "level2": "Northern Mel" + }, + "ten": { + "level0": "Tucanoan", + "level1": "Western Tucanoan", + "level2": "Koreguaje-Tama" + }, + "teo": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Teso-Turkana" + }, + "tep": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tepiman", + "level3": "Tepehuan", + "level4": "Southern Tepehuan" + }, + "teq": { + "level0": "Temeinic" + }, + "ter": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Bolivian Arawakan" + }, + "tes": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Javanesic", + "level3": "Modern Javanese" + }, + "tet": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Eastern Timor", + "level4": "Central Timoric A", + "level5": "Tetunic" + }, + "teu": { + "level0": "Kuliak", + "level1": "Ngangea-So" + }, + "tev": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Teor-Kur" + }, + "tew": { + "level0": "Kiowa-Tanoan", + "level1": "Tewa" + }, + "tex": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southwest Surmic", + "level3": "Didinga-Murle" + }, + "tey": { + "level0": "Kadugli-Krongo", + "level1": "Central-Western Kadugli-Krongo" + }, + "tez": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Western Berber" + }, + "tfi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Eastern Phla-Phera" + }, + "tfn": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Southern Alaskan Athabaskan" + }, + "tfo": { + "level0": "Geelvink Bay" + }, + "tfr": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Isthmic Chibchan", + "level3": "Western Isthmic Chibchan" + }, + "tft": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Ternatean" + }, + "tga": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Taita-Sagalla" + }, + "tgb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Dusunic" + }, + "tgc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "Tungak-Nalik" + }, + "tgd": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2" + }, + "tge": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic" + }, + "tgf": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Phobjib-Chali-Bumthangic", + "level4": "Chali-Bumthangic" + }, + "tgg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage" + }, + "tgh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Vincent-Grenadian Creole", + "level15": "Grenada-Tobago Creole" + }, + "tgi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Banoni-Piva" + }, + "tgj": { + "level0": "Sino-Tibetan", + "level1": "Macro-Tani", + "level2": "Tani", + "level3": "Pre-Western Tani", + "level4": "Western Tani", + "level5": "Subansiri", + "level6": "Bangni-Tagin" + }, + "tgk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Farsic", + "level9": "Eastern Farsic", + "level10": "Tajikic" + }, + "tgl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Tagalogic", + "level5": "Tagalog-Filipino" + }, + "tgn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "South Bisayan", + "level6": "Surigao" + }, + "tgo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Nimoa-Sudest" + }, + "tgp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "South Santo", + "level9": "Araki-Tangoa" + }, + "tgq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Dayic" + }, + "tgr": { + "level0": "Bookkeeping" + }, + "tgs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "tgt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Palawanic", + "level4": "Northern Palawanic", + "level5": "Batak-Central Tagbanwa" + }, + "tgu": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Ataitan", + "level3": "Tangu-Igom" + }, + "tgv": { + "level0": "Bookkeeping" + }, + "tgw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "South Senufo", + "level5": "Tagbana-Jimini" + }, + "tgx": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Cordillera Athabaskan", + "level5": "Nahanni" + }, + "tgy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Sereic", + "level6": "Sere-Indri", + "level7": "Indri-Togoyo" + }, + "tgz": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Alaya-Athima", + "level3": "Central Alaya-Athima" + }, + "tha": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Thai PH", + "level9": "Lao-Thai" + }, + "thc": { + "level0": "Bookkeeping" + }, + "thd": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Southwest Pama", + "level3": "Upper Southwest Paman" + }, + "the": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Eastern Tharu" + }, + "thf": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Newaric", + "level4": "Thangmi-Baram" + }, + "thh": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tarahumara-Guarijio", + "level3": "Tarahumaran", + "level4": "Unclassified Tarahumaran" + }, + "thk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Central Kenya Bantu", + "level9": "Eastern Kirinyaga", + "level10": "Northern Kirinyaga" + }, + "thl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Eastern Tharu", + "level11": "Dangaura-Khuna-Sonaha" + }, + "thm": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Chutic" + }, + "thn": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "thp": { + "level0": "Salishan", + "level1": "Interior Salish", + "level2": "Northern Interior Salish", + "level3": "Thompsonic" + }, + "thq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Eastern Tharu" + }, + "thr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic" + }, + "ths": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Kaike-Ghale-Tamangic", + "level3": "Ghale-Tamangic", + "level4": "Tamangic", + "level5": "Gurungic", + "level6": "Thakali-Chantyal" + }, + "tht": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Cordillera Athabaskan", + "level5": "Nahanni" + }, + "thu": { + "level0": "Nilotic", + "level1": "Western Nilotic", + "level2": "Lwoo", + "level3": "Northern Lwoo", + "level4": "Luwo-Thuri" + }, + "thv": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Tuareg" + }, + "thw": { + "level0": "Bookkeeping" + }, + "thx": { + "level0": "Bookkeeping" + }, + "thy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bikwin-Jen", + "level5": "Southern Bikwin-Jen", + "level6": "Jen" + }, + "thz": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Tuareg", + "level3": "Southern Tuareg" + }, + "tia": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic", + "level4": "Northern Saharan Oasis Berber" + }, + "tic": { + "level0": "Heibanic", + "level1": "West-Central Heibanic", + "level2": "Western Heibanic" + }, + "tie": { + "level0": "Bookkeeping" + }, + "tif": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Division A Mountain Ok", + "level7": "Tifal-Telefol", + "level8": "Tifalic" + }, + "tig": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "Tigre-Dahalik" + }, + "tih": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic", + "level6": "Murutic", + "level7": "Northern Murutic", + "level8": "Lowland Murut" + }, + "tii": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Kwa-Kasai North", + "level15": "Sakata-Tiinic", + "level16": "Tiinic" + }, + "tij": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Thulung-Tilung-Koyi" + }, + "tik": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid" + }, + "til": { + "level0": "Salishan", + "level1": "Coast Salish" + }, + "tim": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Huon", + "level3": "Western Huon", + "level4": "Cromwell", + "level5": "Kabwum" + }, + "tin": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Avar-Andic-Tsezic", + "level3": "Andic", + "level4": "Bagvalal-Tindi" + }, + "tio": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic", + "level11": "Buka", + "level12": "Saposa-Tinputz", + "level13": "Tinputzic" + }, + "tip": { + "level0": "Greater Kwerba", + "level1": "Kwerba-Samarokena", + "level2": "Kwerbaic" + }, + "tiq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Tiefoic" + }, + "tir": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic" + }, + "tis": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Kalinga-Itneg", + "level7": "Kalinga", + "level8": "Masadiit" + }, + "tiu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic" + }, + "tiv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid A", + "level8": "Tiv-Evand", + "level9": "Tiv-Iyive-Otanga" + }, + "tix": { + "level0": "Kiowa-Tanoan", + "level1": "Tiwa-Piro", + "level2": "Tiwa" + }, + "tiy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bilic" + }, + "tiz": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Southern Shanic", + "level11": "Wuding-Yuanyang Tai" + }, + "tja": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Bassa-Klao", + "level5": "Klao-Tajuasohn" + }, + "tjg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Barito-Mahakam" + }, + "tji": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Tujia" + }, + "tjj": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama", + "level3": "Albatross Bay", + "level4": "Anguthimri-Yangathimri-Yuputhimri", + "level5": "Anguthimri-Yangathimri" + }, + "tjl": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Mogaung" + }, + "tjn": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Jogo-Jeri", + "level6": "Jogo" + }, + "tjo": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Greater Zenatic", + "level3": "Zenatic", + "level4": "Northern Saharan Oasis Berber", + "level5": "Ouargli-Oued Righ" + }, + "tjp": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Unclassified Wati" + }, + "tjs": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Tujia" + }, + "tju": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda" + }, + "tka": { + "level0": "Unattested" + }, + "tkb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Unclassified Tharu" + }, + "tkd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Timor", + "level3": "Kemak-Tukudede" + }, + "tke": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Chuwaboic" + }, + "tkf": { + "level0": "Unattested", + "level1": "Tupian (Unattested)" + }, + "tkg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "Southwestern Malagasic", + "level7": "South West-Central Malagasic" + }, + "tkk": { + "level0": "Bookkeeping" + }, + "tkl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Ellicean", + "level9": "Pukapukic", + "level10": "Samoan-Tokelauan" + }, + "tkn": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Amami", + "level4": "Nuclear Amami", + "level5": "Okinoerabu-Tokunoshima" + }, + "tkp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian" + }, + "tkq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Ogonoid", + "level5": "East Ogonoid", + "level6": "Tai-Kana" + }, + "tkr": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Western Samur" + }, + "tks": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Southern Tatic", + "level10": "Ramand-Karaj" + }, + "tkt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Bihari", + "level9": "Tharuic", + "level10": "Eastern Tharu" + }, + "tku": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan", + "level3": "Northern Totonacan", + "level4": "Necaxan" + }, + "tkv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Korap linkage" + }, + "tkw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Utupua-Vanikoro" + }, + "tkx": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Tangko-Nakai" + }, + "tkz": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric" + }, + "tla": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tepiman", + "level3": "Tepehuan", + "level4": "Southern Tepehuan" + }, + "tlb": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran", + "level3": "Tobelo-Tugutil" + }, + "tlc": { + "level0": "Totonacan", + "level1": "Totonac" + }, + "tld": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Sangiric", + "level3": "Northern Sangiric" + }, + "tle": { + "level0": "Bookkeeping" + }, + "tlf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Division A Mountain Ok", + "level7": "Tifal-Telefol" + }, + "tlg": { + "level0": "Namla-Tofanma" + }, + "tlh": { + "level0": "Artificial Language" + }, + "tli": { + "level0": "Athabaskan-Eyak-Tlingit" + }, + "tlj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "North Rutara" + }, + "tlk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki", + "level8": "East Coast Bungku-Tolaki" + }, + "tll": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Tetelaic" + }, + "tlm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo" + }, + "tln": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Torajic" + }, + "tlo": { + "level0": "Narrow Talodi", + "level1": "Buram-Saraf", + "level2": "Nding-Tasomi" + }, + "tlp": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan" + }, + "tlq": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Angkuic", + "level5": "Southern Angkuic" + }, + "tlr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southeast Solomonic", + "level5": "Guadalcanal-Nggelic", + "level6": "Southeast Guadalcanal" + }, + "tls": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "East Santo", + "level9": "Southeast Santo" + }, + "tlt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Eastern Littoral Piru Bay" + }, + "tlu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Piru Bay", + "level4": "East Piru Bay", + "level5": "Solehua", + "level6": "Seram Straits", + "level7": "Ambonic", + "level8": "Northeast Ambon" + }, + "tlv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Taliaboic" + }, + "tlw": { + "level0": "Bookkeeping" + }, + "tlx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "West Manus", + "level8": "West Manus II", + "level9": "Likum-Levei" + }, + "tly": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic" + }, + "tlz": { + "level0": "Bookkeeping" + }, + "tma": { + "level0": "Tamaic", + "level1": "Tama-Sungor-Miisiirii", + "level2": "Tama-Sungor" + }, + "tmb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Peripheral Western Malakula", + "level9": "Southwestern Malakula" + }, + "tmc": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.1", + "level5": "Sumrayic", + "level6": "Ndam-Tumak" + }, + "tmd": { + "level0": "Piawi" + }, + "tme": { + "level0": "Unattested" + }, + "tmf": { + "level0": "Lengua-Mascoy", + "level1": "Eastern Enlhet-Enenlhet" + }, + "tmg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Castilic", + "level13": "South Castilic", + "level14": "Ternate-Zamboanga-Cavite" + }, + "tmi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "Nuclear Santo", + "level8": "East Santo", + "level9": "Mafea-Tutuba" + }, + "tmj": { + "level0": "Greater Kwerba", + "level1": "Kwerba-Samarokena", + "level2": "Samarokena-Airoran" + }, + "tmk": { + "level0": "Bookkeeping" + }, + "tml": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Asmat", + "level4": "Citak Asmat" + }, + "tmm": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "White Tai" + }, + "tmn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Tamanic-Bugis", + "level5": "Tamanic" + }, + "tmo": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "South Aslian", + "level3": "Semelai-Semaq" + }, + "tmq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Siau", + "level8": "Sissano-Tumleo", + "level9": "Ali-Tumleo" + }, + "tmr": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic" + }, + "tms": { + "level0": "Katla-Tima" + }, + "tmt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Cape Cumberland" + }, + "tmu": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "Central Tariku" + }, + "tmv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Ngiri", + "level10": "Ngiri Riverain Mongala", + "level11": "Motemboic" + }, + "tmw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric" + }, + "tmx": { + "level0": "Bookkeeping" + }, + "tmy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage" + }, + "tmz": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Mapoyo-Tamanaku" + }, + "tna": { + "level0": "Pano-Tacanan", + "level1": "Tacanan", + "level2": "Takanik-Chamik", + "level3": "Takanik" + }, + "tnb": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Southern Magdalenic", + "level4": "Tunebo" + }, + "tnc": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "South Eastern Tucanoan" + }, + "tnd": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Southern Magdalenic", + "level4": "Tunebo" + }, + "tnf": { + "level0": "Bookkeeping" + }, + "tng": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic A", + "level4": "East Chadic A.2", + "level5": "East Chadic A.2 2" + }, + "tnh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kaukombaran" + }, + "tni": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea" + }, + "tnj": { + "level0": "Bookkeeping" + }, + "tnk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Tanna", + "level7": "Southern Tanna" + }, + "tnl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Tanna", + "level7": "Northern Tanna linkage" + }, + "tnm": { + "level0": "Sentanic", + "level1": "Nuclear Sentanic" + }, + "tnn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Tanna", + "level7": "Northern Tanna linkage", + "level8": "Whitesands-North Tanna linkage" + }, + "tno": { + "level0": "Pano-Tacanan", + "level1": "Tacanan", + "level2": "Takanik-Chamik", + "level3": "Takanik", + "level4": "Araona-Toromono" + }, + "tnp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Tanna", + "level7": "Northern Tanna linkage", + "level8": "Whitesands-North Tanna linkage" + }, + "tnq": { + "level0": "Arawakan", + "level1": "Caribbean Arawakan", + "level2": "Antillean Arawakan" + }, + "tnr": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Tenda", + "level3": "Bassari-Bedik-Bapen", + "level4": "Bedik-Bapen" + }, + "tns": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "St. Matthias" + }, + "tnt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Minahasan", + "level3": "North Minahasan" + }, + "tnu": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P" + }, + "tnv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Eastern zone", + "level7": "Oriya-Gauda-Kamrupa", + "level8": "Gauda-Kamrupa", + "level9": "Gauda-Banga", + "level10": "Southeastern Bengali" + }, + "tnw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Minahasan" + }, + "tnx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Utupua-Vanikoro" + }, + "tny": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Unclassified Northeast Savanna Bantu", + "level9": "Bende-Tongwe" + }, + "tnz": { + "level0": "Austroasiatic", + "level1": "Aslian", + "level2": "Central-Northern Aslian", + "level3": "North Aslian", + "level4": "Maniq-Menraq-Batek", + "level5": "Maniqic" + }, + "tob": { + "level0": "Guaicuruan", + "level1": "Guaicuru del Sur", + "level2": "Qom", + "level3": "Pilaga-Toba" + }, + "toc": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan", + "level3": "Lowland-Sierra Totonacan", + "level4": "Sierra Totonacan" + }, + "tod": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Mende-Loma", + "level5": "Loma" + }, + "toe": { + "level0": "Bookkeeping" + }, + "tof": { + "level0": "Eastern Trans-Fly" + }, + "tog": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Tumbukic" + }, + "toh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Tsonga-Copi", + "level12": "Chopi (S.60)" + }, + "toi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Greater Eastern Botatwe", + "level9": "Central Eastern Botatwe", + "level10": "Kafue" + }, + "toj": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Kanjobalan-Chujean", + "level4": "Chujean" + }, + "tok": { + "level0": "Artificial Language" + }, + "tol": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "Oregon Athabaskan" + }, + "tom": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Minahasan", + "level3": "North Minahasan", + "level4": "Northeast Minahasan" + }, + "ton": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Tongic" + }, + "too": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan", + "level3": "Northern Totonacan" + }, + "top": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan", + "level3": "Lowland-Sierra Totonacan" + }, + "toq": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Teso-Turkana", + "level4": "Turkanic" + }, + "tor": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic", + "level8": "Central Core Bandaic" + }, + "tos": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan", + "level3": "Lowland-Sierra Totonacan", + "level4": "Sierra Totonacan" + }, + "tot": { + "level0": "Bookkeeping" + }, + "tou": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Cuoi" + }, + "tov": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic", + "level10": "Taromic" + }, + "tow": { + "level0": "Kiowa-Tanoan" + }, + "tox": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Western Trukic", + "level10": "Sonsorol-Tobi" + }, + "toy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio" + }, + "toz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Unclassified Mbum" + }, + "tpa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage", + "level11": "Eastern Taupota", + "level12": "Taupota-Waiema" + }, + "tpc": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Tlapanec-Manguean", + "level3": "Subtiaba-Tlapanec", + "level4": "Mephaa" + }, + "tpe": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Dimasa-Kokborok", + "level5": "Tipperic" + }, + "tpf": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Kaptiau-Tarpia" + }, + "tpg": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "East Alor" + }, + "tpi": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Pacific Creole English", + "level12": "Early Melanesian Pidgin" + }, + "tpj": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I", + "level7": "Tupi-Guarani Subgroup I.B", + "level8": "Chiriguanic" + }, + "tpl": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Tlapanec-Manguean", + "level3": "Subtiaba-Tlapanec", + "level4": "Mephaa", + "level5": "North-Central Mephaa", + "level6": "West-Central Mephaa" + }, + "tpm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Chakalic", + "level11": "Chakali-Tamprusi-Vagala", + "level12": "Chakali-Tamprusi" + }, + "tpn": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup III" + }, + "tpo": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "Red Tai", + "level11": "Tai Muong" + }, + "tpp": { + "level0": "Totonacan", + "level1": "Tepehua" + }, + "tpr": { + "level0": "Tupian", + "level1": "Arikem-Tupari", + "level2": "Tuparic", + "level3": "Nuclear Tuparic", + "level4": "Wayoro-Tupari" + }, + "tpt": { + "level0": "Totonacan", + "level1": "Tepehua" + }, + "tpu": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "West Bahnaric", + "level3": "Tampuon-Bahnar" + }, + "tpv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic", + "level10": "Central Trukic", + "level11": "Satawalese-Carolinian", + "level12": "Macro-Carolinian", + "level13": "Murilo-Fanapanges" + }, + "tpx": { + "level0": "Otomanguean", + "level1": "Western Otomanguean", + "level2": "Tlapanec-Manguean", + "level3": "Subtiaba-Tlapanec", + "level4": "Mephaa", + "level5": "North-Central Mephaa", + "level6": "West-Central Mephaa" + }, + "tpz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "North Bougainville Oceanic", + "level10": "Nuclear North Bougainville Oceanic", + "level11": "Buka", + "level12": "Saposa-Tinputz", + "level13": "Tinputzic" + }, + "tqb": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup IV", + "level6": "Tupi-Guarani Subgroup IV.B" + }, + "tql": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "tqm": { + "level0": "Doso-Turumsa" + }, + "tqn": { + "level0": "Sahaptian", + "level1": "Sahaptin", + "level2": "Southern Sahaptin" + }, + "tqo": { + "level0": "Eleman", + "level1": "Eastern Eleman" + }, + "tqp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage" + }, + "tqq": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana", + "level8": "Dabarre-Tunni" + }, + "tqr": { + "level0": "Narrow Talodi", + "level1": "Lumun-Torona" + }, + "tqt": { + "level0": "Totonacan", + "level1": "Totonac", + "level2": "Central Totonacan" + }, + "tra": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Unclassified Kohistani" + }, + "trb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Kairiruic linkage", + "level9": "Kaiep-Terebu" + }, + "trc": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Trique" + }, + "trd": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric" + }, + "tre": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru" + }, + "trf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Barbados-Eustatius", + "level15": "Barbados-Trinidad" + }, + "trg": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "North-Eastern Neo-Aramaic", + "level11": "Trans-Zab" + }, + "trh": { + "level0": "Dagan" + }, + "tri": { + "level0": "Cariban", + "level1": "Guianan", + "level2": "Taranoan", + "level3": "Tiriyoan" + }, + "trj": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Dangla-Mabire-Birgit", + "level6": "Birgit-Mogum-Toram" + }, + "trl": { + "level0": "Unclassifiable" + }, + "trm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Nuristani", + "level4": "Nuristani Kalasha-Tregami" + }, + "trn": { + "level0": "Arawakan", + "level1": "Southern Maipuran", + "level2": "Bolivian Arawakan", + "level3": "Mojeno-Paunaca", + "level4": "Moje\u00f1o" + }, + "tro": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Northwestern Kuki-Chin", + "level4": "Kolhrengic", + "level5": "Tarao-Chothe" + }, + "trp": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Dimasa-Kokborok", + "level5": "Tipperic" + }, + "trq": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Trique" + }, + "trs": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Trique" + }, + "trt": { + "level0": "Geelvink Bay", + "level1": "Burate-Wate" + }, + "tru": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Aramaic", + "level6": "Imperial-Middle-Modern Aramaic", + "level7": "Middle-Modern Aramaic", + "level8": "Eastern Aramaic", + "level9": "Central Eastern Aramaic", + "level10": "Turoyo-Mlahso" + }, + "trv": { + "level0": "Austronesian", + "level1": "Atayalic" + }, + "trw": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani", + "level8": "Dir-Swat Kohistani" + }, + "trx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Bidayuh" + }, + "try": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Unclassified Sukaphic" + }, + "trz": { + "level0": "Chapacuran", + "level1": "Moreic-Waric", + "level2": "Moreic" + }, + "tsa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Nzebi-Laali-Yaa", + "level19": "Njebi (B.50)", + "level20": "Ndjavi A" + }, + "tsb": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Transversal Lowland East Cushitic", + "level6": "Dullay" + }, + "tsc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Tsonga-Copi", + "level12": "Tswa-Ronga (S.50)" + }, + "tsd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "North Greek" + }, + "tse": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Italian Sign" + }, + "tsf": { + "level0": "Bookkeeping" + }, + "tsg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "South Bisayan", + "level6": "Butuan-Tausug" + }, + "tsh": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Sharwa-Tsuvan" + }, + "tsi": { + "level0": "Tsimshian" + }, + "tsj": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Tshanglic" + }, + "tsk": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Kham-Hor" + }, + "tsl": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai" + }, + "tsm": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "tsn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Sotho-Tswana (S.30)", + "level11": "Western Sotho-Tswana", + "level12": "Central Sotho-Tswana" + }, + "tso": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Tsonga-Copi", + "level12": "Tswa-Ronga (S.50)", + "level13": "Tsongan" + }, + "tsp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Tusia" + }, + "tsq": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "tsr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Southwest Santo" + }, + "tss": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "JSLic" + }, + "tst": { + "level0": "Songhay", + "level1": "Eastern Songhay" + }, + "tsu": { + "level0": "Austronesian", + "level1": "Tsouic" + }, + "tsv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "B10-B30", + "level8": "Okani (B.30)", + "level9": "Southern Okani" + }, + "tsw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Kambari-Cicipu", + "level6": "Kambaric", + "level7": "East Kambaric" + }, + "tsx": { + "level0": "Anim", + "level1": "Inland Gulf of Papua", + "level2": "West Inland Gulf of Papua" + }, + "tsy": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "tsz": { + "level0": "Tarascan" + }, + "tta": { + "level0": "Siouan", + "level1": "Ohio Valley Siouan" + }, + "ttb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Dakoid", + "level6": "Tiba-Dong" + }, + "ttc": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Mamean", + "level4": "Mamean" + }, + "tte": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic" + }, + "ttf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Sanaga-West Mbam (A.40)", + "level10": "West Mbam (A.40)" + }, + "ttg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Lower Baram", + "level6": "Central Lower Baram A" + }, + "tth": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "Ta'oihic", + "level3": "Ong-Ta'oih" + }, + "tti": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Jayapura Bay", + "level8": "Eastern Jayapura Bay" + }, + "ttj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "Rutara", + "level11": "North Rutara", + "level12": "Nkore-Kiga-Nyoro-Tooro", + "level13": "Nyoro-Tooro" + }, + "ttk": { + "level0": "Barbacoan", + "level1": "Coconucan" + }, + "ttl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Botatwe", + "level8": "Western Botatwe", + "level9": "Machili" + }, + "ttm": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Central Alaska-Yukon Athabaskan", + "level4": "Tanana-Tutchone", + "level5": "Tutchone" + }, + "ttn": { + "level0": "Pauwasi", + "level1": "Western Pauwasi" + }, + "tto": { + "level0": "Austroasiatic", + "level1": "Katuic", + "level2": "Ta'oihic", + "level3": "Ong-Ta'oih" + }, + "ttp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Pamona-Tombelala" + }, + "ttq": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Tuareg", + "level3": "Southern Tuareg" + }, + "ttr": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Teraic", + "level5": "Western Tera" + }, + "tts": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Thai PH", + "level9": "Lao-Thai", + "level10": "Sakon Nakhon" + }, + "ttt": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian", + "level7": "Farsic-Caucasian Tat", + "level8": "Caucasian Tat" + }, + "ttu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Mono-Uruavan" + }, + "ttv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Koro-Lele-Nali-Titan" + }, + "ttw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Lowland Kenyah", + "level6": "Western Lowland Kenyah-Penan" + }, + "ttx": { + "level0": "Bookkeeping" + }, + "tty": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku" + }, + "ttz": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Kyirong-Kagate" + }, + "tua": { + "level0": "Nuclear Torricelli", + "level1": "Marienberg", + "level2": "Mandi-Muniwara" + }, + "tub": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Californian Uto-Aztecan" + }, + "tuc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Ngero", + "level8": "Western Ngero", + "level9": "Tuam" + }, + "tue": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan II", + "level4": "Pisamira-Yuruti", + "level5": "Tuyuca-Yuruti" + }, + "tuf": { + "level0": "Chibchan", + "level1": "Core Chibchan", + "level2": "Magdalenic", + "level3": "Southern Magdalenic", + "level4": "Tunebo" + }, + "tug": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Riverine Bua" + }, + "tuh": { + "level0": "Taulil-Butam" + }, + "tui": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Northern Mbum", + "level6": "Tupuri-Mundang-Mambai" + }, + "tuj": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Mainland North Halmaheran", + "level3": "Tobelo-Tugutil" + }, + "tuk": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz", + "level4": "East Oghuz" + }, + "tul": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja", + "level7": "Tulaic", + "level8": "Tula-Ma-Yebu", + "level9": "Nuclear Tulaic" + }, + "tum": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Tumbuka-Sena-Nyanja", + "level8": "Tumbukic" + }, + "tuo": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan I" + }, + "tuq": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Tebu" + }, + "tur": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Oghuz", + "level3": "Nuclear Oghuz", + "level4": "West Oghuz" + }, + "tus": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian", + "level2": "Tuscarora-Nottoway" + }, + "tuu": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "Oregon Athabaskan", + "level5": "Rogue River" + }, + "tuv": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Teso-Lotuxo-Maa", + "level3": "Teso-Turkana", + "level4": "Turkanic" + }, + "tux": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa" + }, + "tuy": { + "level0": "Nilotic", + "level1": "Southern Nilotic", + "level2": "Kalenjin", + "level3": "Central Kalenjin", + "level4": "Plateau Central Kalenjin" + }, + "tuz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Kirma-Tyurama" + }, + "tva": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Choiseul", + "level10": "West Choiseul" + }, + "tvd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Kambari-Cicipu", + "level6": "Kambaric", + "level7": "East Kambaric" + }, + "tve": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Teun-Nila-Serua" + }, + "tvi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi", + "level7": "Zeemic", + "level8": "Nuclear Zeemic" + }, + "tvk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Ambrym" + }, + "tvl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Ellicean" + }, + "tvm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Southwest Babar" + }, + "tvn": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Burmish", + "level4": "Southern Burmish", + "level5": "Mranmaic", + "level6": "Nuclear Mranmaic" + }, + "tvo": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Ternatean" + }, + "tvs": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Pare-Taveta" + }, + "tvt": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "North Patkaian", + "level4": "Noctean", + "level5": "Tutsic" + }, + "tvu": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Sanaga-West Mbam (A.40)", + "level10": "West Mbam (A.40)", + "level11": "Mandi-Nyokon" + }, + "tvw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Greater Kaili" + }, + "tvy": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Luso-Asian Creole" + }, + "twa": { + "level0": "Salishan", + "level1": "Coast Salish", + "level2": "Central Salish" + }, + "twb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "South Mangyan", + "level4": "Buhid-Taubuid", + "level5": "Batangan" + }, + "twc": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.1", + "level5": "Ngizim-Southwestern Bade", + "level6": "Shira-Southwestern Bade", + "level7": "Shira" + }, + "twe": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "Pantar", + "level5": "Teiwa-Sar" + }, + "twf": { + "level0": "Kiowa-Tanoan", + "level1": "Tiwa-Piro", + "level2": "Tiwa", + "level3": "Taos-Picuris" + }, + "twg": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "West Alor-Straits-Pantar", + "level4": "Kaera-Straits", + "level5": "Blagaric" + }, + "twh": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "White Tai" + }, + "twl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Core Shona", + "level10": "Plateau Shona", + "level11": "Central Shona" + }, + "twm": { + "level0": "Bookkeeping" + }, + "twn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mambila", + "level11": "Eastern Mambila" + }, + "two": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Sotho-Tswana (S.30)", + "level11": "Northern Sotho", + "level12": "Sepedic" + }, + "twp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Eastern Admiralty Islands", + "level6": "Manus", + "level7": "East Manus", + "level8": "Kurti-Kele-Ere" + }, + "twq": { + "level0": "Songhay", + "level1": "Northwest Songhay", + "level2": "Northern Songhay" + }, + "twr": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tarahumara-Guarijio", + "level3": "Tarahumaran", + "level4": "Unclassified Tarahumaran" + }, + "twt": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup IV", + "level6": "Tupi-Guarani Subgroup IV.B" + }, + "twu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "Nuclear Rote", + "level5": "Central East Rote" + }, + "tww": { + "level0": "Walioic" + }, + "twx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Shona (S.10)", + "level9": "Core Shona", + "level10": "Plateau Shona" + }, + "twy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "North East Greater Barito" + }, + "txa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Dusunic", + "level6": "Paitanic" + }, + "txb": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Tokharian" + }, + "txc": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan" + }, + "txe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tolitoli" + }, + "txg": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Gyalrongic", + "level5": "West Gyalrongic", + "level6": "Horpa" + }, + "txh": { + "level0": "Indo-European", + "level1": "Unclassified Indo-European" + }, + "txi": { + "level0": "Cariban", + "level1": "Pekodian", + "level2": "Xinguan" + }, + "txj": { + "level0": "Saharan", + "level1": "Western Saharan", + "level2": "Kanuri-Kanembu", + "level3": "Kanembuic" + }, + "txm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Northern Tomini" + }, + "txn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru" + }, + "txo": { + "level0": "Sino-Tibetan", + "level1": "Dhimal-Lhokpu-Toto" + }, + "txq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Rote-Meto", + "level4": "Nuclear Rote" + }, + "txr": { + "level0": "Unclassifiable" + }, + "txs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Minahasan", + "level3": "North Minahasan", + "level4": "Northeast Minahasan" + }, + "txt": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro", + "level3": "Asmat", + "level4": "Citak Asmat" + }, + "txu": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Goyaz", + "level4": "Northern Je" + }, + "txx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Sabahan", + "level4": "Southwest Sabahan", + "level5": "Greater Murutic" + }, + "txy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "Southwestern Malagasic", + "level7": "South West-Central Malagasic", + "level8": "Nuclear South West-Central Malagasic", + "level9": "Inland-Western Malagasic", + "level10": "Bara-Tanosy" + }, + "tya": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Unclassified Rai Coast" + }, + "tye": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Bisa-Busa", + "level3": "Samo-Busa", + "level4": "Busan", + "level5": "Kyenga-Shanga" + }, + "tyh": { + "level0": "Austroasiatic", + "level1": "Khmuic", + "level2": "Phay-Pram", + "level3": "Pramic" + }, + "tyi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie" + }, + "tyj": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "Red Tai", + "level11": "Tai Muong" + }, + "tyl": { + "level0": "Bookkeeping" + }, + "tyn": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Ndeiram" + }, + "typ": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Alaya-Athima", + "level3": "Thaypanic" + }, + "tyr": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "Red Tai" + }, + "tys": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai" + }, + "tyt": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Chiang Saeng", + "level10": "Red Tai" + }, + "tyu": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "Ost-Kxoe", + "level4": "Tshwa Khoe" + }, + "tyv": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "South Siberian Turkic", + "level3": "Sayan-Yenisei Turkic", + "level4": "Sayan" + }, + "tyx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie" + }, + "tyy": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Tarokoid", + "level5": "Bijimic-Sur-Shall", + "level6": "Kwangic" + }, + "tyz": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai" + }, + "tza": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "tzb": { + "level0": "Bookkeeping" + }, + "tzc": { + "level0": "Bookkeeping" + }, + "tze": { + "level0": "Bookkeeping" + }, + "tzh": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Cholan-Tzeltalan", + "level4": "Tzeltalan" + }, + "tzj": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean", + "level4": "Core Quichean", + "level5": "Cakchiquel-Tzutujil" + }, + "tzl": { + "level0": "Artificial Language" + }, + "tzm": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Kabyle-Atlas Berber", + "level3": "Atlas Berber" + }, + "tzn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Wetar-Atauro", + "level4": "Wetar", + "level5": "Perai-Tugun-Aputai" + }, + "tzo": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Western Mayan", + "level3": "Cholan-Tzeltalan", + "level4": "Tzeltalan" + }, + "tzs": { + "level0": "Bookkeeping" + }, + "tzt": { + "level0": "Bookkeeping" + }, + "tzu": { + "level0": "Bookkeeping" + }, + "tzx": { + "level0": "Lower Sepik", + "level1": "Karawarian" + }, + "tzz": { + "level0": "Bookkeeping" + }, + "uam": { + "level0": "Unclassifiable" + }, + "uan": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P" + }, + "uar": { + "level0": "Eleman", + "level1": "Eastern Eleman" + }, + "uba": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic", + "level7": "Bukpic" + }, + "ubi": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.3" + }, + "ubm": { + "level0": "Bookkeeping" + }, + "ubr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Are linkage" + }, + "ubu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Hagen", + "level3": "Aua-Gawil" + }, + "uby": { + "level0": "Abkhaz-Adyge" + }, + "uda": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "West Lower Cross", + "level7": "Oroic", + "level8": "Enwang-Uda" + }, + "ude": { + "level0": "Tungusic", + "level1": "Northeastern Tungusic", + "level2": "Central-Eastern Tungusic", + "level3": "Oroch-Udihe" + }, + "udg": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Irula-Muduga", + "level8": "Muduga-Palu" + }, + "udi": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Eastern Samur", + "level5": "Udi-Aghwan" + }, + "udj": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Ujir-Kola-Kompane" + }, + "udl": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Margi-Mandara-Mofu", + "level5": "Mofuic", + "level6": "Tokombere", + "level7": "Madaic" + }, + "udm": { + "level0": "Uralic", + "level1": "Permian" + }, + "udu": { + "level0": "Koman", + "level1": "Central Koman", + "level2": "Komo-Uduk" + }, + "ues": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Munan", + "level9": "Munic", + "level10": "Western Munic" + }, + "ufi": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Gusap-Mot", + "level4": "Ufim-Rawa-Nahu" + }, + "uga": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Canaanite", + "level6": "Ugarito-Phoenician" + }, + "ugb": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Kuku-Wik", + "level6": "Paman Kuku" + }, + "uge": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "New Georgia", + "level10": "East New Georgia" + }, + "ugh": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Dargwic" + }, + "ugn": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "ugo": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese" + }, + "ugy": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "uha": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Osse" + }, + "uig": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Turkestan", + "level4": "Modern Turkestan", + "level5": "Uyghuric" + }, + "uis": { + "level0": "South Bougainville", + "level1": "Buinic", + "level2": "Buin" + }, + "uiv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid A", + "level8": "Tiv-Evand", + "level9": "Tiv-Iyive-Otanga" + }, + "uji": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Jilic-Eggonic", + "level5": "Jilic" + }, + "uka": { + "level0": "South Bird's Head Family" + }, + "ukg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Numugenan", + "level6": "Yarawata-Parawen-Ukuriguma" + }, + "ukh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Makaa-Kako (A.80-90)", + "level8": "Makaa-Njem (A.80)", + "level9": "Mpoic", + "level10": "Mpiemo-Ukhwejo" + }, + "ukl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "RSLic", + "level3": "Nuclear RSLic", + "level4": "Central RSLic" + }, + "ukp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Bendic", + "level6": "Nuclear Bendic", + "level7": "Bukpic" + }, + "ukq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross", + "level6": "Central Lower Cross", + "level7": "Efikic" + }, + "ukr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Balto-Slavic", + "level3": "Slavic", + "level4": "East Slavic", + "level5": "Ukrainian-Rusyn" + }, + "uks": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "uku": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Northwestern Edoid", + "level6": "Osse", + "level7": "Ukue-Ehueun" + }, + "ukv": { + "level0": "Nilotic", + "level1": "Eastern Nilotic", + "level2": "Barian", + "level3": "Nuclear Barian" + }, + "ukw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Igboid", + "level4": "Nuclear Igboid", + "level5": "Central-Northern Igbo" + }, + "uky": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Southwest Pama", + "level3": "Upper Southwest Paman" + }, + "ula": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Shiroro-Kamuku", + "level6": "Shiroro" + }, + "ulb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Eastern Ede", + "level8": "Southeastern Ede", + "level9": "Nuclear Yoruba" + }, + "ulc": { + "level0": "Tungusic", + "level1": "Central-Western Tungusic", + "level2": "Ulchaic" + }, + "uli": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Western Trukic" + }, + "ulk": { + "level0": "Eastern Trans-Fly" + }, + "ull": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "ulm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Pitu Ulunna Salu", + "level6": "Matangnga-Aralle-Tabulahan" + }, + "uln": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Upper Franconian", + "level10": "Global German" + }, + "ulu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Highland Kenyah", + "level6": "Upper Pujungan" + }, + "ulw": { + "level0": "Misumalpan", + "level1": "Sumalpan", + "level2": "Sumuic" + }, + "uly": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Northwest South Bauchi", + "level7": "Polci-Luri", + "level8": "Polcic" + }, + "uma": { + "level0": "Sahaptian", + "level1": "Sahaptin", + "level2": "Southern Sahaptin" + }, + "umb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene" + }, + "umd": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Northeastern Pama", + "level4": "Umbindhamuic" + }, + "umg": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Lamalamic", + "level3": "Coastal Lamalamic" + }, + "umi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Punan", + "level6": "Bukat-Ukit-Beketan-Lugat-Lisum" + }, + "umm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "North-South Central Delta Cross", + "level7": "Ubaghara-Kohumono", + "level8": "Kohumonoic" + }, + "umn": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Patkaian", + "level3": "South Patkaian", + "level4": "Southeastern Patkaian" + }, + "umo": { + "level0": "Bororoan" + }, + "ump": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Northeastern Pama", + "level4": "Umpilaic" + }, + "ums": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Tominic", + "level5": "Southern Tomini" + }, + "umu": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Delawaran", + "level5": "Common Delaware" + }, + "una": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Watut" + }, + "une": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "North-Central Edoid", + "level6": "Afenmai-Bendel", + "level7": "Uneme-Yekhee" + }, + "ung": { + "level0": "Worrorran" + }, + "uni": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Barupu Lagoon" + }, + "unk": { + "level0": "Arawakan", + "level1": "Central-Eastern Maipuran", + "level2": "Central Maipuran", + "level3": "Xaray", + "level4": "Parecis-Nawe" + }, + "unm": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Delawaran", + "level5": "Common Delaware" + }, + "unn": { + "level0": "Pama-Nyungan", + "level1": "Ganaic" + }, + "unr": { + "level0": "Austroasiatic", + "level1": "Mundaic", + "level2": "North Munda", + "level3": "Kherwarian", + "level4": "Mundaric", + "level5": "Ho-Mundari" + }, + "unu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic" + }, + "unx": { + "level0": "Bookkeeping" + }, + "unz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Northern Kaili-Wolio", + "level5": "Greater Kaili", + "level6": "Common Kaili" + }, + "uok": { + "level0": "Bookkeeping" + }, + "uon": { + "level0": "Austronesian", + "level1": "Northwest Formosan" + }, + "upi": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic", + "level3": "Waina-Punda" + }, + "upv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage" + }, + "urb": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VIII", + "level6": "Guaja-Kaapor-Ava" + }, + "urc": { + "level0": "Giimbiyu", + "level1": "Urninganggic" + }, + "urd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Shaurasenic", + "level8": "Indo-Aryan Central zone", + "level9": "Western Hindi", + "level10": "Hindustani" + }, + "ure": { + "level0": "Uru-Chipaya" + }, + "urf": { + "level0": "Bookkeeping" + }, + "urg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Peka", + "level4": "Urigina-Danaru" + }, + "urh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Akpes-Edoid", + "level4": "Edoid", + "level5": "Southwestern Edoid" + }, + "uri": { + "level0": "Nuclear Torricelli" + }, + "urk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric", + "level6": "Northeastern Peninsular Malay" + }, + "url": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu" + }, + "urm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Mountain Ok", + "level6": "Division A Mountain Ok", + "level7": "Tifal-Telefol", + "level8": "Tifalic" + }, + "urn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Tanimbar-Bomberai", + "level3": "Nuclear Tanimbar-Bomberai", + "level4": "Yamdena-Onin", + "level5": "Oninic" + }, + "uro": { + "level0": "Baining" + }, + "urp": { + "level0": "Unclassifiable" + }, + "urr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "urt": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat" + }, + "uru": { + "level0": "Tupian", + "level1": "Purubora-Ramarama", + "level2": "Ramarama" + }, + "urv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Mono-Uruavan" + }, + "urw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Peka" + }, + "urx": { + "level0": "Nuclear Torricelli", + "level1": "Marienberg", + "level2": "Elepi-Kamasau-Marienberg" + }, + "ury": { + "level0": "Tor-Orya" + }, + "urz": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva", + "level7": "Nuclear Kawahiva", + "level8": "Central Kawahiva", + "level9": "Amondava-Uru-Eu-Wau-Wau" + }, + "usa": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Gauwa", + "level4": "Auyana" + }, + "ush": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic", + "level8": "Kohistanic Shina" + }, + "usi": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Dimasa-Kokborok", + "level5": "Tipperic" + }, + "usk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Lower Cross", + "level5": "Nuclear Lower Cross" + }, + "usp": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Quichean-Mamean", + "level3": "Greater Quichean" + }, + "usu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Nuru", + "level4": "Erimaic" + }, + "uta": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Tivoid", + "level6": "Central Tivoid", + "level7": "Central Tivoid A", + "level8": "Tiv-Evand", + "level9": "Tiv-Iyive-Otanga" + }, + "ute": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Numic", + "level3": "Southern Numic" + }, + "utp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Utupua-Vanikoro", + "level6": "Utupua" + }, + "utr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Akweya", + "level5": "Etulo-Idoma" + }, + "utu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Silopic", + "level6": "Silopi-Utu" + }, + "uum": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Northwest Kipchak", + "level5": "West Kipchak", + "level6": "Crimean Tatar-Urum" + }, + "uur": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "Southern Vanuatu", + "level6": "Erromanga" + }, + "uuu": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Angkuic" + }, + "uve": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "Vanuatu-Loyalty Outliers" + }, + "uvh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Erap" + }, + "uvl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Mengenic" + }, + "uwa": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Kuku-Wik", + "level6": "Paman Kuku" + }, + "uya": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Agoi-Doko-Iyoniyong" + }, + "uzn": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Turkestan", + "level4": "Modern Turkestan", + "level5": "Uzbek" + }, + "uzs": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Turkestan", + "level4": "Modern Turkestan", + "level5": "Uzbek" + }, + "vaa": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "vae": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental", + "level3": "Nuclear SBB Occidental", + "level4": "Nduga-Luto" + }, + "vaf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Southern Tatic", + "level10": "Vafsic" + }, + "vag": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "East-West Grusi", + "level8": "Western Grusi", + "level9": "Sisaala-Chakali", + "level10": "Chakalic", + "level11": "Chakali-Tamprusi-Vagala" + }, + "vah": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani", + "level9": "Old-Modern Marathi", + "level10": "Modern Marathi" + }, + "vai": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Vai-Kono" + }, + "vaj": { + "level0": "Kxa", + "level1": "Ju-Kung" + }, + "val": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage" + }, + "vam": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Skouic", + "level3": "Eastern Skouic", + "level4": "West Coast Skouic" + }, + "van": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "West Palai" + }, + "vao": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Northern Malakula", + "level8": "North Coast Malakula", + "level9": "Botovro-Vovo-Vao" + }, + "vap": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Sizangic", + "level6": "Gangte-Vaiphei" + }, + "var": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Tarahumara-Guarijio" + }, + "vas": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil", + "level8": "Vasave-Noiri" + }, + "vau": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Lebonya", + "level14": "Bantu D33", + "level15": "Vanuma-Nyali" + }, + "vav": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Southern zone", + "level7": "Marathic", + "level8": "Marathi-Konkani", + "level9": "Old-Modern Marathi", + "level10": "Modern Marathi" + }, + "vay": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Northwestern Kiranti" + }, + "vbb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Masela-South Babar" + }, + "vec": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Italian" + }, + "ved": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Dhivehi-Sinhala", + "level6": "Sinhalaic" + }, + "vem": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Lamang-Hdi" + }, + "ven": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu" + }, + "veo": { + "level0": "Chumashan", + "level1": "Southern Chumashan", + "level2": "Central Chumashan" + }, + "vep": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "North Finnic", + "level5": "Ladogan", + "level6": "East Ladoga" + }, + "ver": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Northern Samba-Duru", + "level7": "Vere-Gimme", + "level8": "Vere" + }, + "vgr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Gujaratic", + "level10": "Western Gujaratic" + }, + "vgt": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Dutch-Belgian Sign", + "level4": "Belgian Sign" + }, + "vic": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "Caribbean English Creole", + "level13": "Eastern Caribbean Creole", + "level14": "Barbados-Eustatius" + }, + "vid": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "West Ruvu", + "level11": "Vidunda-Sagala" + }, + "vie": { + "level0": "Austroasiatic", + "level1": "Vietic", + "level2": "Viet-Muong" + }, + "vif": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic" + }, + "vig": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur" + }, + "vin": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "Western Lakes Bantu", + "level10": "Kivu", + "level11": "West Highlands Kivu" + }, + "vis": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid" + }, + "vit": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Unclassified Narrow Grassfields" + }, + "viv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Bwaidoga linkage", + "level9": "Bwaidoka-Iduna" + }, + "vka": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda", + "level5": "Ngarluma-Kariyarra" + }, + "vki": { + "level0": "Bookkeeping" + }, + "vkk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "South Sumatra Malay" + }, + "vkl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki", + "level8": "East Coast Bungku-Tolaki" + }, + "vkm": { + "level0": "Kamakanan", + "level1": "Nuclear Kamakanan" + }, + "vkn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Unclassified Western Benue-Congo Plateau" + }, + "vko": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Western Bungku-Tolaki", + "level8": "West Coast Bungku-Tolaki" + }, + "vkp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Southwestern Shifted Romance", + "level11": "West Ibero-Romance", + "level12": "Galician Romance", + "level13": "Macro-Portuguese", + "level14": "Indo-Portuguesic", + "level15": "Northern Indo-Portuguesic" + }, + "vkt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric" + }, + "vku": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda", + "level5": "Yindjibarndi-Kurrama" + }, + "vky": { + "level0": "Bookkeeping" + }, + "vkz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Ebira-Nupoid", + "level4": "Macro-Nupoid", + "level5": "Nupoid" + }, + "vlp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Cape Cumberland" + }, + "vlr": { + "level0": "Bookkeeping" + }, + "vls": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Southwestern Dutch" + }, + "vma": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda" + }, + "vmb": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Finasleigh Pama" + }, + "vmc": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec" + }, + "vmd": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "South-Western Dravidian", + "level4": "Koraga" + }, + "vme": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Southwest Maluku", + "level4": "Babar", + "level5": "South Babar", + "level6": "Masela-South Babar" + }, + "vmf": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Upper Franconian", + "level10": "Greater East Franconian" + }, + "vmg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Patpatar-Minigir-Tolai", + "level9": "Minigir-Tolai" + }, + "vmh": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic" + }, + "vmi": { + "level0": "Worrorran", + "level1": "Northern Worrorran", + "level2": "Forrest River" + }, + "vmj": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Coast Mixtec" + }, + "vmk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe" + }, + "vml": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Kartu-Nhanda", + "level3": "Kartu" + }, + "vmm": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Teozacoalco Mixtec" + }, + "vmp": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Valley Mazatec", + "level6": "Ayautlic", + "level7": "Northern Baja Mazatec" + }, + "vmq": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Northeastern Alta Mixtec" + }, + "vmr": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Chuwaboic" + }, + "vms": { + "level0": "Unattested" + }, + "vmu": { + "level0": "Pama-Nyungan", + "level1": "Yimidhirr-Yalanji-Yidinic", + "level2": "Yalandyic" + }, + "vmv": { + "level0": "Maiduan" + }, + "vmw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe" + }, + "vmx": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Teozacoalco Mixtec" + }, + "vmy": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Valley Mazatec", + "level6": "Ayautlic" + }, + "vmz": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Popolocan-Mazatecan", + "level4": "Mazatecan", + "level5": "Central Mazatec" + }, + "vnk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Temotu", + "level5": "Utupua-Vanikoro" + }, + "vnm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Western Malakula linkage", + "level8": "Central-Western Malakula" + }, + "vnp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Cape Cumberland" + }, + "vol": { + "level0": "Artificial Language" + }, + "vor": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bena-Mboi", + "level5": "Bena", + "level6": "Yungur-Voro" + }, + "vot": { + "level0": "Uralic", + "level1": "Finnic", + "level2": "Coastal Finnic", + "level3": "Neva", + "level4": "Central Finnic" + }, + "vra": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage", + "level7": "Lemerig-Veraa" + }, + "vro": { + "level0": "Uralic", + "level1": "Finnic" + }, + "vrs": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "New Ireland-Northwest Solomonic linkage", + "level7": "St George linkage", + "level8": "Northwest Solomonic", + "level9": "Choiseul", + "level10": "West Choiseul" + }, + "vrt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Central Vanuatu", + "level6": "Malakula", + "level7": "Eastern Malakula linkage", + "level8": "Central-Southeast Malakula" + }, + "vsi": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "RSLic", + "level3": "Nuclear RSLic", + "level4": "Central RSLic" + }, + "vsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Spanish Sign" + }, + "vsv": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Spanish Sign", + "level3": "Nuclear Spanish Sign" + }, + "vto": { + "level0": "Tor-Orya", + "level1": "Tor", + "level2": "Coastal Tor", + "level3": "Betaf-Vitou" + }, + "vum": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo", + "level20": "Vilic", + "level21": "Lumbuic", + "level22": "Ngubi-Sangu-Sira-Punu", + "level23": "Sangu-Sira-Punu", + "level24": "Punu-Vungu" + }, + "vun": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Kilimanjaro-Taita", + "level9": "Kilimanjaro Bantu", + "level10": "Chaga", + "level11": "Central Kilimanjaro" + }, + "vut": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Vutic", + "level10": "Vute-Wawa" + }, + "vwa": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Waic", + "level5": "Wa-Lawa", + "level6": "Nuclear Waic" + }, + "waa": { + "level0": "Sahaptian", + "level1": "Sahaptin", + "level2": "Northern Sahaptin" + }, + "wab": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Bel-Roinji-Nenaya", + "level9": "Bel", + "level10": "Eastern Bel" + }, + "wac": { + "level0": "Chinookan" + }, + "wad": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Ansus-Ambai" + }, + "wae": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Alemannic", + "level10": "South Alemannic" + }, + "waf": { + "level0": "Unattested" + }, + "wag": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage", + "level11": "Eastern Taupota", + "level12": "Taupota-Waiema" + }, + "wah": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Seram Laut", + "level3": "Banda-Geser", + "level4": "Seran Laut" + }, + "wai": { + "level0": "Unattested", + "level1": "Tor-Orya (Unattested)" + }, + "waj": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Tairora" + }, + "wal": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "North-West Ometo", + "level3": "Central Ometo" + }, + "wam": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Southern New England Algonquian" + }, + "wan": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Nwa-Ben", + "level4": "Wan-Mwan" + }, + "wao": { + "level0": "Yuki-Wappo" + }, + "wap": { + "level0": "Arawakan", + "level1": "Negro-Roraima", + "level2": "Pidjanan", + "level3": "Wapishanan", + "level4": "Wapishana-Atorai" + }, + "war": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater Central Philippine", + "level3": "Central Philippine", + "level4": "Bisayan", + "level5": "Central Bisayan", + "level6": "Warayan", + "level7": "Samar-Waray" + }, + "wat": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Are linkage" + }, + "wau": { + "level0": "Arawakan", + "level1": "Central-Eastern Maipuran", + "level2": "Central Maipuran", + "level3": "Xinguan Arawak", + "level4": "Waura-Mehinaku-Kustenau", + "level5": "Waura-Mehinaku" + }, + "wav": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Yandangic", + "level7": "Waka-Yendang-Teme", + "level8": "Waka-Yandang" + }, + "waw": { + "level0": "Cariban", + "level1": "Parukotoan", + "level2": "Waiwaian" + }, + "wax": { + "level0": "Ramu", + "level1": "Lower Ramu", + "level2": "Ottilien", + "level3": "Watam-Kaian" + }, + "way": { + "level0": "Cariban", + "level1": "Guianan", + "level2": "Wayanaic" + }, + "waz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Upper Markham", + "level9": "Mountain Upper Markham" + }, + "wbb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Eastern Yapen" + }, + "wbe": { + "level0": "Lakes Plain", + "level1": "Tariku", + "level2": "East Tariku", + "level3": "Doutai-Kai-Waritai" + }, + "wbf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Samu" + }, + "wbh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mwika", + "level10": "Fipaic", + "level11": "Maluwawaru" + }, + "wbi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Wanji-Sangu" + }, + "wbj": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "South Cushitic", + "level3": "Greater West Rift South Cushitic", + "level4": "West Rift South Cushitic", + "level5": "Northern West Rift South Cushitic" + }, + "wbk": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Nuristani", + "level4": "Nuristani Kalasha-Tregami" + }, + "wbl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Saka-Wakhi" + }, + "wbm": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "East Palaungic", + "level4": "Waic", + "level5": "Wa-Lawa", + "level6": "Nuclear Waic" + }, + "wbp": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Yapa" + }, + "wbq": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Teluguic" + }, + "wbr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Bhil" + }, + "wbt": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Wati", + "level3": "Martuwangkic", + "level4": "Warnman-Wangka" + }, + "wbv": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Kartu-Nhanda", + "level3": "Kartu" + }, + "wbw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Cenderawasih Bay", + "level5": "Yapen", + "level6": "Central-Western Yapen", + "level7": "Ansus-Ambai" + }, + "wca": { + "level0": "Yanomamic", + "level1": "Ninam-Yanomam-Yaroame", + "level2": "Yanomam-Yaroame", + "level3": "Yanomam-Yanimamo" + }, + "wci": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Western Gbe", + "level5": "Kpesi-Waci" + }, + "wdd": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie", + "level14": "Moyen Kasai-Ngounie", + "level15": "Interior Kasai-Ngounie", + "level16": "West Kasai-Ngounie", + "level17": "Northwest Kasai-Ngounie", + "level18": "Nzebi-Laali-Yaa", + "level19": "Njebi (B.50)", + "level20": "Ndjavi B" + }, + "wdg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Kalamic-South Adelbert", + "level3": "South Adelbert", + "level4": "Osum-Wadaginam-Pomoikan" + }, + "wdu": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric" + }, + "wea": { + "level0": "Sino-Tibetan", + "level1": "Karenic", + "level2": "Southern Karen", + "level3": "Sgaw" + }, + "wec": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee", + "level5": "Guere-Krahn", + "level6": "Guere" + }, + "wed": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage", + "level11": "Wedauic" + }, + "weh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "West Ring", + "level10": "Aghemic", + "level11": "Aghem-Weh" + }, + "wei": { + "level0": "Anim", + "level1": "Tirio", + "level2": "Nuclear Tirio" + }, + "wem": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Fongbeic" + }, + "weo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Three Rivers" + }, + "wep": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Alts\u00e4chsisch", + "level7": "Middle-Modern Low German", + "level8": "Low German", + "level9": "West Low German" + }, + "wer": { + "level0": "Kunimaipan", + "level1": "Weric" + }, + "wes": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English", + "level10": "Macro-English", + "level11": "Guinea Coast Creole English", + "level12": "West African Creole English", + "level13": "Coastal Nigerian Krio", + "level14": "Nigeria-Cameroon Creole English" + }, + "wet": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Wetar-Atauro", + "level4": "Wetar", + "level5": "Perai-Tugun-Aputai", + "level6": "Perai-Aputai" + }, + "weu": { + "level0": "Bookkeeping" + }, + "wew": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Sumba", + "level6": "Wewewa-Laboya" + }, + "wfg": { + "level0": "Pauwasi", + "level1": "Eastern Pauwasi" + }, + "wga": { + "level0": "Pama-Nyungan", + "level1": "Ngarna", + "level2": "Southern Ngarna", + "level3": "Ngarru" + }, + "wgb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "Suauic" + }, + "wgg": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Palku", + "level3": "Arabana-Wangganguru" + }, + "wgi": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Wahgic" + }, + "wgo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera" + }, + "wgu": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura" + }, + "wgw": { + "level0": "Bookkeeping" + }, + "wgy": { + "level0": "Pama-Nyungan" + }, + "wha": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Nunusaku", + "level3": "Patakai-Manusela", + "level4": "Manusela-Huaulu" + }, + "whg": { + "level0": "Nuclear Trans New Guinea", + "level1": "Chimbu-Wahgi", + "level2": "Wahgic" + }, + "whk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Lowland Kenyah" + }, + "whu": { + "level0": "Bookkeeping" + }, + "wib": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Tusia" + }, + "wic": { + "level0": "Caddoan", + "level1": "Northern Caddoan" + }, + "wie": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Ep-Meanha-Keyenganh" + }, + "wif": { + "level0": "Unattested", + "level1": "Pama-Nyungan (Unattested)" + }, + "wig": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Ngatharra-Ngathana-Iinychanya" + }, + "wih": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Ep-Meanha-Keyenganh" + }, + "wii": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic" + }, + "wij": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Kuku-Wik", + "level6": "Mungkanic", + "level7": "Mungkan-Mungkanhu" + }, + "wik": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Ngatharra-Ngathana-Iinychanya" + }, + "wil": { + "level0": "Worrorran", + "level1": "Northern Worrorran" + }, + "wim": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Kuku-Wik", + "level6": "Mungkanic", + "level7": "Mungkan-Mungkanhu" + }, + "win": { + "level0": "Siouan", + "level1": "Mississippi Valley", + "level2": "Winnebago-Chiwere" + }, + "wir": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Tupi-Guarani Subgroup VI", + "level6": "Kawahiva", + "level7": "Unclassified Kawahiva" + }, + "wit": { + "level0": "Wintuan" + }, + "wiv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Meso Melanesian linkage", + "level6": "Bali-Vitu" + }, + "wiw": { + "level0": "Bookkeeping" + }, + "wiy": { + "level0": "Algic" + }, + "wja": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Tula-Longuda", + "level6": "Tula-Waja" + }, + "wji": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.2", + "level5": "Nuclear West Chadic B.2", + "level6": "Central West Chadic B.2", + "level7": "Warji-Gala-Kariya" + }, + "wka": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "South Cushitic", + "level3": "Greater West Rift South Cushitic" + }, + "wkb": { + "level0": "Bookkeeping" + }, + "wkd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi", + "level8": "Sobeic" + }, + "wkl": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid", + "level10": "Kalanadic" + }, + "wku": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid", + "level10": "Kalanadic" + }, + "wkw": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "North Coast Pama-Nyungan", + "level3": "Waka-Kabic", + "level4": "Western Waka-Kabic" + }, + "wla": { + "level0": "Walioic", + "level1": "Pai-Sinen-Walio" + }, + "wlc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Comorian Bantu" + }, + "wle": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Harari-East Gurage", + "level6": "Silte-Wolane" + }, + "wlg": { + "level0": "Gunwinyguan" + }, + "wlh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Timor" + }, + "wli": { + "level0": "North Halmahera", + "level1": "Northern North Halmahera", + "level2": "Sahuan", + "level3": "Nuclear Sahuan", + "level4": "Sahu-Waioli" + }, + "wlk": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "California Athabaskan" + }, + "wll": { + "level0": "Nubian", + "level1": "Central Nubian", + "level2": "Kordofan Nubian", + "level3": "Western Kordofan Nubian" + }, + "wln": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan", + "level4": "Latinic", + "level5": "Imperial Latin", + "level6": "Romance", + "level7": "Italo-Western Romance", + "level8": "Western Romance", + "level9": "Shifted Western Romance", + "level10": "Northwestern Shifted Romance", + "level11": "Gallo-Rhaetian", + "level12": "Oil" + }, + "wlo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Southern Kaili-Wolio", + "level5": "Island Kaili-Wolio", + "level6": "Wolio-Kamaru" + }, + "wlr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo", + "level8": "Southwest Santo" + }, + "wls": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Tokalau Fijian", + "level6": "Polynesian", + "level7": "Nuclear Polynesian", + "level8": "East Uvean-Niuafo'ou" + }, + "wlu": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "North Coast Pama-Nyungan", + "level3": "Waka-Kabic", + "level4": "Western Waka-Kabic" + }, + "wlv": { + "level0": "Mataguayan", + "level1": "Mataguayo II", + "level2": "Wichi" + }, + "wlw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Central Dani", + "level3": "Grand Valley Dani", + "level4": "Walakic" + }, + "wlx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Safaliba-Dagaare", + "level14": "Dagaaric", + "level15": "Central-South Dagaric", + "level16": "South Dagaric" + }, + "wly": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Southern Kiranti", + "level6": "Bantawic" + }, + "wma": { + "level0": "Unattested" + }, + "wmb": { + "level0": "Mirndi", + "level1": "Ngurlun" + }, + "wmc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Wamas-Samosa-Murupi-Mosimo" + }, + "wmd": { + "level0": "Nambiquaran", + "level1": "Nambikwara Complex", + "level2": "Northern Nambiquaran" + }, + "wme": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Chaurasiya" + }, + "wmg": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic", + "level4": "Muya" + }, + "wmh": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Timor-Babar", + "level3": "Eastern Timor", + "level4": "Kawaimina" + }, + "wmi": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Finasleigh Pama" + }, + "wmm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Greater South Sulawesi", + "level3": "South Sulawesi", + "level4": "Northern South Sulawesi", + "level5": "Masenrempulu" + }, + "wmn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Mid-Northern New Caledonian", + "level9": "Voh-Kone-Cem-Pac", + "level10": "Voh-Kone" + }, + "wmo": { + "level0": "Nuclear Torricelli" + }, + "wms": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Dumut", + "level6": "Ketum-Wambon" + }, + "wmt": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Ngumpin", + "level4": "Western Ngumpin" + }, + "wmw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Sabaki-Swahili" + }, + "wmx": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Serra Hills", + "level3": "Rawo-Main Serra" + }, + "wnb": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Tibor" + }, + "wnc": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Wantoatic", + "level4": "Wantoat-Awara" + }, + "wnd": { + "level0": "Mangarrayi-Maran", + "level1": "Maran" + }, + "wne": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Pashto" + }, + "wng": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Ndeiram" + }, + "wni": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Comorian Bantu", + "level12": "Shindzwani-Shimaore" + }, + "wnk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Sumba", + "level6": "Central-East Sumbanese", + "level7": "Central Sumbanese" + }, + "wnm": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Palku", + "level3": "Pitta-Pitta" + }, + "wno": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani" + }, + "wnp": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei" + }, + "wnu": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Numugenan", + "level6": "Karian-Usan-Yaben" + }, + "wny": { + "level0": "Garrwan" + }, + "woa": { + "level0": "Northern Daly" + }, + "wob": { + "level0": "Kru", + "level1": "Greater Western Kru", + "level2": "Western Kru", + "level3": "Wee-Bassa-Klao", + "level4": "Wee" + }, + "woc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Kairiru-Manam", + "level8": "Manamic linkage", + "level9": "Kis-Wogeo" + }, + "wod": { + "level0": "Nuclear Trans New Guinea", + "level1": "Paniai Lakes", + "level2": "Mee-Wodani" + }, + "woe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Micronesian", + "level5": "Central Micronesian", + "level6": "Western Micronesian", + "level7": "Chuukic-Ponapeic", + "level8": "Trukic", + "level9": "Nuclear Trukic" + }, + "wof": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Wolof-BKK", + "level3": "Wolofic" + }, + "wog": { + "level0": "Sepik", + "level1": "Iwam-Wogamus", + "level2": "Wogamusin-Chenapian" + }, + "woi": { + "level0": "Timor-Alor-Pantar", + "level1": "Alor-Pantar", + "level2": "Nuclear Alor-Pantar", + "level3": "Central Alor", + "level4": "Abuic" + }, + "wok": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru" + }, + "wol": { + "level0": "Atlantic-Congo", + "level1": "North-Central Atlantic", + "level2": "Wolof-BKK", + "level3": "Wolofic" + }, + "wom": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Samba-Duru", + "level6": "Southern Samba-Duru", + "level7": "Sambaic", + "level8": "Samba-Leko-Perema-Mumbake", + "level9": "Perema-Mumbake" + }, + "won": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic", + "level11": "Bushoong-Wongo-Lele" + }, + "woo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Aru", + "level3": "Central Aru" + }, + "wor": { + "level0": "Geelvink Bay" + }, + "wos": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Ambulas-Hanga-Hundi" + }, + "wow": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki", + "level8": "East Coast Bungku-Tolaki" + }, + "woy": { + "level0": "Unattested" + }, + "wpc": { + "level0": "Saliban", + "level1": "Maco-Piaroa" + }, + "wrb": { + "level0": "Pama-Nyungan", + "level1": "Ngarna", + "level2": "Southern Ngarna", + "level3": "Thawa" + }, + "wrd": { + "level0": "Bookkeeping" + }, + "wre": { + "level0": "Unattested", + "level1": "Atlantic-Congo (Unattested)" + }, + "wrg": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Northern Maric", + "level5": "Warungu-Gugu Badhun" + }, + "wrh": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Wiradhuric" + }, + "wri": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Mantharta", + "level4": "Wariyangga-Dhargari" + }, + "wrk": { + "level0": "Garrwan" + }, + "wrl": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic", + "level2": "Ngumpin-Yapa", + "level3": "Yapa" + }, + "wrm": { + "level0": "Pama-Nyungan", + "level1": "Desert Nyungic" + }, + "wrn": { + "level0": "Heibanic", + "level1": "Eastern Heibanic" + }, + "wro": { + "level0": "Worrorran", + "level1": "Western Worrorran" + }, + "wrp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea" + }, + "wrr": { + "level0": "Yangmanic" + }, + "wrs": { + "level0": "Border", + "level1": "Warisic", + "level2": "Nuclear Warisic" + }, + "wru": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Western Bungku-Tolaki", + "level8": "West Coast Bungku-Tolaki" + }, + "wrv": { + "level0": "Suki-Gogodala", + "level1": "Gogodalic", + "level2": "Ari-Waruna" + }, + "wrw": { + "level0": "Pama-Nyungan", + "level1": "Unclassified Pama-Nyungan" + }, + "wrx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Manggaraiic", + "level6": "Waerana-Razong" + }, + "wry": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani" + }, + "wrz": { + "level0": "Gunwinyguan", + "level1": "Western Gunwinyguan", + "level2": "Warrayic" + }, + "wsa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Lower Mamberamo" + }, + "wsg": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian II", + "level3": "Gondi", + "level4": "Northwest Gondi", + "level5": "Southwest Gondi", + "level6": "Southern Gondi" + }, + "wsi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Espiritu Santo", + "level7": "West Santo" + }, + "wsk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Kowan" + }, + "wsr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Kainantu", + "level3": "Gauwa", + "level4": "Awa-Oweina" + }, + "wss": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Nyo", + "level4": "Potou-Tano", + "level5": "Tano", + "level6": "Central Tano", + "level7": "Akanic" + }, + "wsu": { + "level0": "Unattested" + }, + "wsv": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Kohistani" + }, + "wtf": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Evapia" + }, + "wth": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Kulin-Bunganditj", + "level4": "Kulin", + "level5": "Nuclear Kulin" + }, + "wtk": { + "level0": "Sepik", + "level1": "Sepik Hill", + "level2": "Central Sepik Hill", + "level3": "Nuclear Central Sepik Hill", + "level4": "Kapriman-Watakataui" + }, + "wtm": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Midlands Indo-Aryan", + "level7": "Apabhramsic", + "level8": "Gujarati-Rajasthani", + "level9": "Rajasthani", + "level10": "Mewati-Gojri" + }, + "wtw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Kaili-Wolio", + "level4": "Southern Kaili-Wolio" + }, + "wua": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama" + }, + "wub": { + "level0": "Worrorran", + "level1": "Northern Worrorran" + }, + "wud": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Fongbeic" + }, + "wuh": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Northern Chinese", + "level5": "Mandarinic" + }, + "wul": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Ngalik-Nduga" + }, + "wum": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ndasaic", + "level8": "Samayic", + "level9": "Ndasa-Wumbvu" + }, + "wun": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Corridor Bantu", + "level8": "Mbozi", + "level9": "Mwika" + }, + "wur": { + "level0": "Marrku-Wurrugu" + }, + "wut": { + "level0": "Sko", + "level1": "Skou-Serra-Piore", + "level2": "Skouic", + "level3": "Eastern Skouic", + "level4": "West Coast Skouic" + }, + "wuu": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Wu-Hui Chinese" + }, + "wuv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Admiralty Islands", + "level5": "Western Admiralty Islands" + }, + "wux": { + "level0": "Limilngan-Wulna" + }, + "wuy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "Maya-Matbat", + "level6": "Raja Ampat Maya" + }, + "wwa": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Oti-Volta Oriental", + "level10": "Waama-Tayari-Ditammari" + }, + "wwb": { + "level0": "Unclassifiable" + }, + "wwo": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "North and Central Vanuatu", + "level5": "Northern Vanuatu", + "level6": "Torres-Banks linkage" + }, + "wwr": { + "level0": "Nyulnyulan", + "level1": "Eastern Nyulnyulan", + "level2": "Nyikinic" + }, + "www": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Vutic", + "level10": "Vute-Wawa" + }, + "wxa": { + "level0": "Sino-Tibetan", + "level1": "Sinitic" + }, + "wya": { + "level0": "Iroquoian", + "level1": "Northern Iroquoian" + }, + "wyb": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Wiradhuric" + }, + "wyi": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Kulin-Bunganditj", + "level4": "Kulin", + "level5": "Nuclear Kulin" + }, + "wym": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "East Middle German", + "level8": "Schlesisch-Wilmesau" + }, + "wyr": { + "level0": "Tupian", + "level1": "Arikem-Tupari", + "level2": "Tuparic", + "level3": "Nuclear Tuparic", + "level4": "Wayoro-Tupari" + }, + "wyy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Central Pacific linkage", + "level5": "Western Fijian" + }, + "xaa": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic", + "level7": "Moroccan-Andalusian Arabic" + }, + "xab": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Alumic", + "level5": "Hasha-Sambe" + }, + "xac": { + "level0": "Sino-Tibetan", + "level1": "Brahmaputran", + "level2": "Bodo-Garo", + "level3": "Boroic", + "level4": "Tiwa-Boro", + "level5": "Bodo-Mech-Kachari" + }, + "xag": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Lezgic", + "level3": "Samur", + "level4": "Eastern Samur", + "level5": "Udi-Aghwan" + }, + "xah": { + "level0": "Bookkeeping" + }, + "xai": { + "level0": "Unclassifiable" + }, + "xal": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Eastern Mongolic", + "level3": "Khalkha-Buriat", + "level4": "Mongolian" + }, + "xam": { + "level0": "Tuu", + "level1": "!Ui" + }, + "xan": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "Agaw", + "level3": "Northern-Eastern-Western Agaw", + "level4": "Northeastern Agaw" + }, + "xao": { + "level0": "Bookkeeping" + }, + "xap": { + "level0": "Muskogean", + "level1": "Alabaman-Koasati" + }, + "xas": { + "level0": "Uralic", + "level1": "Samoyedic", + "level2": "Kamas-Selkup" + }, + "xat": { + "level0": "Katukinan" + }, + "xau": { + "level0": "Greater Kwerba", + "level1": "Kwerba-Samarokena", + "level2": "Kwerbaic" + }, + "xav": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Central Je" + }, + "xaw": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Numic", + "level3": "Southern Numic" + }, + "xay": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik", + "level5": "Kayanic", + "level6": "Rejang-Makaham Kayan" + }, + "xba": { + "level0": "Bookkeeping" + }, + "xbc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB" + }, + "xbe": { + "level0": "Pama-Nyungan", + "level1": "East Queensland Border Pama Nyungan", + "level2": "Yugambalic", + "level3": "Yugambal-Bigambal" + }, + "xbg": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Kulin-Bunganditj", + "level4": "Warrnambool-Bunganditj" + }, + "xbi": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Kombio-Yambes", + "level3": "Kombioic" + }, + "xbo": { + "level0": "Turkic", + "level1": "Bolgar" + }, + "xbr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Sumba-Hawu", + "level5": "Sumba", + "level6": "Central-East Sumbanese" + }, + "xbw": { + "level0": "Unclassifiable" + }, + "xbx": { + "level0": "Bookkeeping" + }, + "xcc": { + "level0": "Unclassifiable" + }, + "xce": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic" + }, + "xcg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Cisalpine Celtic" + }, + "xch": { + "level0": "Chimakuan" + }, + "xcl": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Armenic" + }, + "xco": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Sogdic-Ossetic", + "level6": "Sogdic" + }, + "xcr": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic", + "level4": "Luvic", + "level5": "Lyco-Carian", + "level6": "Milyan-Carian" + }, + "xct": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan" + }, + "xcv": { + "level0": "Yukaghir", + "level1": "Kolymic" + }, + "xda": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Yuin-Kuri", + "level4": "Kuri", + "level5": "Sydney-Hawkesbury" + }, + "xdc": { + "level0": "Indo-European", + "level1": "Unclassified Indo-European" + }, + "xdk": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Yuin-Kuri", + "level4": "Kuri", + "level5": "Sydney-Hawkesbury" + }, + "xdo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Njila", + "level8": "Southern Njila", + "level9": "Kunene", + "level10": "Cimbebasia" + }, + "xdq": { + "level0": "Nakh-Daghestanian", + "level1": "Daghestanian", + "level2": "Dargwic", + "level3": "South Dargwa" + }, + "xdy": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic" + }, + "xeb": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "East Semitic" + }, + "xed": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Lamang-Hdi" + }, + "xeg": { + "level0": "Tuu", + "level1": "!Ui", + "level2": "Eastern !Ui" + }, + "xel": { + "level0": "Eastern Jebel", + "level1": "Aka-Kelo-Molo" + }, + "xem": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Land Dayak", + "level3": "Bidayuh-Southern Land Dayak", + "level4": "Southern Land Dayak" + }, + "xer": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Central Je" + }, + "xes": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Evapia", + "level4": "Nuclear Evapia", + "level5": "Kesawai-Wia" + }, + "xet": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup I" + }, + "xeu": { + "level0": "Eleman", + "level1": "Western Eleman" + }, + "xfa": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Latino-Faliscan" + }, + "xga": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Continental Transalpine Celtic", + "level6": "Unclassified Continental Transalpine Celtic" + }, + "xgb": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Nwa-Ben", + "level4": "Unclassified Nwa-Ben" + }, + "xgd": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama", + "level3": "Gudang-Northeast Paman" + }, + "xgf": { + "level0": "Uto-Aztecan", + "level1": "Northern Uto-Aztecan", + "level2": "Californian Uto-Aztecan", + "level3": "Serran" + }, + "xgm": { + "level0": "Pama-Nyungan", + "level1": "Rockhampton-Gladstone" + }, + "xgu": { + "level0": "Worrorran", + "level1": "Western Worrorran" + }, + "xgw": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Guwa-Yanda" + }, + "xhd": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Sayhadic" + }, + "xhe": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Continental Indo-Aryan", + "level6": "Indo-Aryan Northwestern zone", + "level7": "Sindhi-Lahnda", + "level8": "Sindhic" + }, + "xho": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Nguni (S.40)", + "level12": "Nuclear Nguni", + "level13": "Southern Ndebele-Lowland" + }, + "xhu": { + "level0": "Hurro-Urartian" + }, + "xhv": { + "level0": "Bookkeeping" + }, + "xii": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Khoekhoe", + "level3": "South Khoekhoe" + }, + "xil": { + "level0": "Unclassifiable" + }, + "xip": { + "level0": "Unattested", + "level1": "Pano-Tacanan (Unattested)" + }, + "xir": { + "level0": "Arawakan", + "level1": "Negro-Roraima", + "level2": "Bahuanaic" + }, + "xiv": { + "level0": "Unattested" + }, + "xiy": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Jurunic" + }, + "xjb": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Greater Bandjalangic", + "level4": "Bandjalangic", + "level5": "Coastal Bandjalang" + }, + "xka": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Nuclear Eastern Dardic", + "level7": "Shinaic", + "level8": "Western Shinaic", + "level9": "Dangari" + }, + "xkb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Western Ede", + "level8": "Southwestern Ede" + }, + "xkc": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic" + }, + "xkd": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik", + "level5": "Kayanic" + }, + "xke": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Central Sarawak", + "level4": "Punan-Muller-Schwaner", + "level5": "Muller-Schwaner", + "level6": "Hovongan-Kereho" + }, + "xkf": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Phobjib-Chali-Bumthangic", + "level4": "Chali-Bumthangic", + "level5": "Bumthangic" + }, + "xkg": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Manding-Jogo", + "level5": "Manding-Vai", + "level6": "Manding-Mokole", + "level7": "Manding", + "level8": "West Manding", + "level9": "Kita-Kagoro" + }, + "xkh": { + "level0": "Unattested", + "level1": "Cariban (Unattested)" + }, + "xki": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "xkj": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic", + "level10": "Khalkhalic" + }, + "xkk": { + "level0": "Austroasiatic", + "level1": "Bahnaric", + "level2": "North Bahnaric", + "level3": "Lamamic" + }, + "xkl": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Highland Kenyah" + }, + "xkm": { + "level0": "Bookkeeping" + }, + "xkn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "Kayan-Murik-Modang", + "level4": "Kayan-Murik", + "level5": "Kayanic" + }, + "xko": { + "level0": "Bookkeeping" + }, + "xkp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian", + "level7": "Adharic", + "level8": "Tatic", + "level9": "Central Tatic", + "level10": "Taromic" + }, + "xkq": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki", + "level8": "East Coast Bungku-Tolaki" + }, + "xkr": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Central Je" + }, + "xks": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Butonic", + "level9": "East Buton" + }, + "xkt": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Oti-Volta Occidental", + "level11": "Nuclear Oti-Volta Occidental", + "level12": "Northwest Oti-Volta", + "level13": "Mossi-Farefare", + "level14": "Mossic" + }, + "xku": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kamba-Kunyi" + }, + "xkv": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Sotho-Tswana (S.30)", + "level11": "Western Sotho-Tswana" + }, + "xkw": { + "level0": "Lepki-Murkim-Kembra" + }, + "xkx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage", + "level8": "Southwest New Britain linkage", + "level9": "Arawe-Pasismanua", + "level10": "Pasismanua" + }, + "xky": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Kenyahic", + "level5": "Highland Kenyah", + "level6": "Upper Pujungan" + }, + "xkz": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Phobjib-Chali-Bumthangic", + "level4": "Chali-Bumthangic", + "level5": "Bumthangic" + }, + "xla": { + "level0": "Kamula-Elevala" + }, + "xlb": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Delawaran", + "level5": "Mahican-Woronoco-Pojassick" + }, + "xlc": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic", + "level4": "Luvic", + "level5": "Lyco-Carian", + "level6": "Lyco-Sidetic" + }, + "xld": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian" + }, + "xle": { + "level0": "Unclassifiable" + }, + "xlg": { + "level0": "Unclassifiable" + }, + "xlo": { + "level0": "Algic", + "level1": "Algonquian-Blackfoot", + "level2": "Algonquian", + "level3": "Eastern Algonquian", + "level4": "Maritimes-Southern New England Algonquian", + "level5": "Southern New England Algonquian" + }, + "xlp": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Cisalpine Celtic" + }, + "xls": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Unclassified Italic" + }, + "xlu": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic", + "level4": "Luvic", + "level5": "Luvian" + }, + "xly": { + "level0": "Unclassifiable" + }, + "xmb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Jarawan" + }, + "xmc": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe" + }, + "xmd": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Dabaic" + }, + "xmf": { + "level0": "Kartvelian", + "level1": "Georgian-Zan", + "level2": "Zan" + }, + "xmg": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "West Bamileke" + }, + "xmh": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Kuku-Wik", + "level6": "Paman Kuku" + }, + "xmi": { + "level0": "Unattested" + }, + "xmj": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "North Biu-Mandara", + "level4": "Kotoko-Buduma", + "level5": "Kotoko Meridional" + }, + "xml": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic", + "level4": "Malaysian Sign" + }, + "xmm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Vehicular Malay", + "level6": "Eastern Indonesia Trade Malay", + "level7": "Manadoic Malay" + }, + "xmo": { + "level0": "Unattested", + "level1": "Tupian (Unattested)" + }, + "xmp": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Compromise Middle Pama", + "level3": "Wik", + "level4": "Kuku-Wik-Ep", + "level5": "Kuku-Wik", + "level6": "Paman Kuku" + }, + "xmq": { + "level0": "Bookkeeping" + }, + "xms": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "ASLic" + }, + "xmt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "Maya-Matbat" + }, + "xmu": { + "level0": "Eastern Daly" + }, + "xmv": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "North-Central Malagasic", + "level7": "Northern Malagasic" + }, + "xmw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Southeast Barito", + "level5": "Malagasic", + "level6": "North-Central Malagasic", + "level7": "Northern Malagasic", + "level8": "Tsimihety-Betsimisaraka" + }, + "xmx": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Raja Ampat-South Halmahera", + "level5": "Salawati-Batta" + }, + "xmy": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Mayabic" + }, + "xmz": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Bungku-Tolaki", + "level7": "Eastern Bungku-Tolaki", + "level8": "East Coast Bungku-Tolaki" + }, + "xna": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian" + }, + "xnb": { + "level0": "Austronesian", + "level1": "Tsouic", + "level2": "Kanakanavu-Saaroa" + }, + "xng": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic" + }, + "xnh": { + "level0": "Bookkeeping" + }, + "xnj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Manda-Ngoni", + "level10": "Tanzania-Mozambique Ngoni" + }, + "xnm": { + "level0": "Nyulnyulan", + "level1": "Eastern Nyulnyulan", + "level2": "Unclassified Eastern Nyulnyulan" + }, + "xnn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Meso-Cordilleran", + "level4": "South-Central Cordilleran", + "level5": "Central Cordilleran", + "level6": "Nuclear Cordilleran", + "level7": "Bontok-Kankanay", + "level8": "Kankanay", + "level9": "Maeng-Northern Kankanay" + }, + "xnq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Manda-Ngoni", + "level10": "Tanzania-Mozambique Ngoni" + }, + "xnr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Indo-Aryan", + "level4": "Middle-Modern Indo-Aryan", + "level5": "Eastern Dardic", + "level6": "Himachali", + "level7": "Kangric-Chamealic-Bhattiyali", + "level8": "Kangri-Dogri" + }, + "xns": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Western West Himalayish", + "level4": "Kinnauric" + }, + "xny": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda" + }, + "xod": { + "level0": "South Bird's Head Family", + "level1": "East South Bird's Head" + }, + "xog": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza", + "level10": "North Nyanza", + "level11": "Soga-Kenyi" + }, + "xoi": { + "level0": "Ramu", + "level1": "Goam", + "level2": "Tamolan", + "level3": "Unclassified Tamolan" + }, + "xok": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Southern Je", + "level3": "Kaingang-Xokleng" + }, + "xom": { + "level0": "Koman", + "level1": "Central Koman", + "level2": "Komo-Uduk" + }, + "xon": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Northern Central Gur", + "level6": "Bwamu-Oti-Volta", + "level7": "Oti-Volta", + "level8": "Nuclear Oti-Volta", + "level9": "Gurma-Yom-Oti-Volta Occidental", + "level10": "Gurma-Yom-Naudem", + "level11": "Gurma", + "level12": "Gurma B", + "level13": "Konkomba-Gangam" + }, + "xop": { + "level0": "Lower Sepik", + "level1": "Nor" + }, + "xor": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mayoruna Branch", + "level3": "Mayo Group", + "level4": "Matses subgroup" + }, + "xow": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Kumil-Tibor", + "level6": "Tibor", + "level7": "Nuclear Tibor" + }, + "xpa": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Pirriya-Kungkari" + }, + "xpb": { + "level0": "North-Eastern Tasmanian" + }, + "xpc": { + "level0": "Turkic", + "level1": "Common Turkic", + "level2": "Kipchak-Turkestan", + "level3": "Kipchak", + "level4": "Unclassified Kipchak" + }, + "xpe": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Kpelle" + }, + "xpf": { + "level0": "South-Eastern Tasmanian" + }, + "xpg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian" + }, + "xph": { + "level0": "North-Eastern Tasmanian" + }, + "xpi": { + "level0": "Unclassifiable" + }, + "xpk": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mayoruna Branch", + "level3": "Mayo Group", + "level4": "Matses subgroup" + }, + "xpl": { + "level0": "Western Tasmanian" + }, + "xpm": { + "level0": "Yeniseian" + }, + "xpn": { + "level0": "Unclassifiable" + }, + "xpo": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Corachol-Aztecan", + "level3": "Aztec" + }, + "xpr": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Central Iranian PB", + "level6": "Northwestern Iranian" + }, + "xps": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic", + "level4": "Luvic", + "level5": "Unclassified Luvic" + }, + "xpu": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Northwest Semitic", + "level5": "Canaanite", + "level6": "Ugarito-Phoenician", + "level7": "Phoenician-Punic" + }, + "xpw": { + "level0": "Western Tasmanian", + "level1": "Western Coastal Tasmanian" + }, + "xpx": { + "level0": "Western Tasmanian", + "level1": "Western Coastal Tasmanian" + }, + "xpz": { + "level0": "South-Eastern Tasmanian" + }, + "xqt": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Sayhadic" + }, + "xrb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Senufo", + "level4": "Karaboro" + }, + "xre": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Cerrado", + "level3": "Goyaz", + "level4": "Northern Je", + "level5": "Eastern Timbira" + }, + "xrn": { + "level0": "Yeniseian" + }, + "xrr": { + "level0": "Unclassifiable" + }, + "xrt": { + "level0": "Unclassifiable" + }, + "xru": { + "level0": "Western Daly", + "level1": "Bringen", + "level2": "Marithielic" + }, + "xrw": { + "level0": "Sepik", + "level1": "Ram", + "level2": "Pouye-Karawa" + }, + "xsa": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Sayhadic" + }, + "xsb": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Central Luzon", + "level3": "Sambalic", + "level4": "Tina-Bolinao" + }, + "xsd": { + "level0": "Indo-European", + "level1": "Anatolian", + "level2": "Luvo-Lydian", + "level3": "Luvo-Palaic", + "level4": "Luvic", + "level5": "Lyco-Carian", + "level6": "Lyco-Sidetic" + }, + "xse": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Asmat-Kamoro" + }, + "xsh": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Hyamic" + }, + "xsi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Ngero-Vitiaz linkage", + "level7": "Vitiaz linkage" + }, + "xsk": { + "level0": "Bookkeeping" + }, + "xsl": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Northwestern Canada Athabaskan", + "level4": "Slaveyic", + "level5": "Slave" + }, + "xsm": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Gur", + "level4": "Central Gur", + "level5": "Southern Central Gur", + "level6": "Grusi", + "level7": "Northern Grusi", + "level8": "Nuna-Kasem" + }, + "xsn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Kainji", + "level4": "Central Kainji", + "level5": "Basa-Eastern Kainji", + "level6": "Eastern Kainji", + "level7": "Jos", + "level8": "Northern Jos", + "level9": "North-Central Jos", + "level10": "Chokobo-Lemoro-Sanga", + "level11": "Lemoro-Sanga" + }, + "xso": { + "level0": "Unclassifiable" + }, + "xsp": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Silopic", + "level6": "Silopi-Utu" + }, + "xsq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "North Mozambique Bantu", + "level8": "Makua-Lomwe" + }, + "xsr": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Middle Old Tibetan", + "level5": "Late Old Tibetan", + "level6": "Central Tibetan", + "level7": "South-Western Tibetic", + "level8": "Sherpa-Jirel", + "level9": "Sherpaic" + }, + "xss": { + "level0": "Bookkeeping" + }, + "xst": { + "level0": "Bookkeeping" + }, + "xsu": { + "level0": "Yanomamic" + }, + "xsy": { + "level0": "Austronesian", + "level1": "Northwest Formosan" + }, + "xta": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Guerrero Mixtec", + "level7": "Nuclear Guerrero Mixtec" + }, + "xtb": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Northern Baja Mixtec" + }, + "xtc": { + "level0": "Kadugli-Krongo", + "level1": "Central-Western Kadugli-Krongo", + "level2": "Katcha-Kadugli-Miri-Kanga" + }, + "xtd": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec" + }, + "xte": { + "level0": "Nuclear Trans New Guinea", + "level1": "Mek", + "level2": "Eastern Mek" + }, + "xtg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Celtic", + "level3": "Nuclear Celtic", + "level4": "Core Celtic", + "level5": "Continental Transalpine Celtic" + }, + "xti": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec", + "level8": "Chalcatongic", + "level9": "Sinicahua-Tijaltepec" + }, + "xtj": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Tlaxiacic", + "level8": "Yucuane-Teita" + }, + "xtl": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Southwestern Alta Mixtec", + "level8": "Chalcatongic", + "level9": "Sinicahua-Tijaltepec" + }, + "xtm": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Tlaxiacic" + }, + "xtn": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec" + }, + "xto": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Tokharian" + }, + "xtp": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Teozacoalco Mixtec", + "level9": "Sindihuic" + }, + "xtq": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Saka-Wakhi", + "level5": "Saka" + }, + "xts": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Eastern Alta Mixtec", + "level7": "Southeastern Alta Mixtec", + "level8": "Teozacoalco Mixtec", + "level9": "Sindihuic" + }, + "xtt": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Western Alta Mixtec", + "level7": "Tlaxiacic" + }, + "xtu": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Northern Alta Mixtec" + }, + "xtv": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Yuin-Kuri", + "level4": "Yuin" + }, + "xtw": { + "level0": "Nambiquaran", + "level1": "Nambikwara Complex", + "level2": "Northern Nambiquaran", + "level3": "Roosevelt" + }, + "xty": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Amuzgo-Mixtecan", + "level3": "Mixtecan", + "level4": "Mixtec-Cuicatec", + "level5": "Mixtec", + "level6": "Guerrero Mixtec", + "level7": "Nuclear Guerrero Mixtec", + "level8": "Southwestern Guerrero Mixtec" + }, + "xtz": { + "level0": "Bookkeeping" + }, + "xua": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid" + }, + "xub": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid" + }, + "xuf": { + "level0": "Bookkeeping" + }, + "xug": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Okinawa" + }, + "xuj": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Badaga-Kannada", + "level5": "Kannadoid" + }, + "xum": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic", + "level3": "Sabellic" + }, + "xuo": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Mbumic", + "level5": "Central Mbum" + }, + "xup": { + "level0": "Athabaskan-Eyak-Tlingit", + "level1": "Athabaskan-Eyak", + "level2": "Athabaskan", + "level3": "Pacific Coast Athabaskan", + "level4": "Oregon Athabaskan" + }, + "xur": { + "level0": "Hurro-Urartian" + }, + "xut": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Norman Pama", + "level3": "Kuthant-Gurdjar" + }, + "xuu": { + "level0": "Khoe-Kwadi", + "level1": "Khoe", + "level2": "Non-Khoekhoe", + "level3": "West-Kxoe", + "level4": "Kxoe-Ani" + }, + "xve": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Italic" + }, + "xwc": { + "level0": "Siouan", + "level1": "Catawban" + }, + "xwe": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Western Phla-Phera" + }, + "xwg": { + "level0": "Surmic", + "level1": "South Surmic", + "level2": "Southeast Surmic" + }, + "xwl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Gbe", + "level4": "Eastern Gbe", + "level5": "Western Phla-Phera" + }, + "xwr": { + "level0": "Greater Kwerba", + "level1": "Kwerba-Samarokena", + "level2": "Kwerbaic" + }, + "xxb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Kwa Volta-Congo", + "level3": "Na-Togo" + }, + "xxk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Bima-Lembata", + "level3": "Flores-Sumba-Hawu", + "level4": "Flores Barat", + "level5": "Central Flores-Paluqe", + "level6": "Central Flores", + "level7": "Eastern Central Flores", + "level8": "Nage-Keo" + }, + "xxr": { + "level0": "Nuclear-Macro-Je", + "level1": "Maxakali-Borum", + "level2": "Maxakalian", + "level3": "Nuclear Maxakalian", + "level4": "Unclassified Nuclear Maxakalian" + }, + "xya": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "North Coast Pama-Nyungan", + "level3": "Gumbaynggiric" + }, + "xyb": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric", + "level5": "Bidyaric" + }, + "xyl": { + "level0": "Unattested", + "level1": "Nambiquaran (Unattested)" + }, + "xyy": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "Victorian Pama-Nyungan", + "level3": "Eastern Victoria" + }, + "xzh": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "West Himalayish", + "level3": "Eastern West Himalayish", + "level4": "Pithauragarh", + "level5": "Darma-Byangsi-Chaudangsi", + "level6": "Darma-Byangsi", + "level7": "Zhangzhungic" + }, + "yaa": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano", + "level5": "Yaminawa Complex" + }, + "yab": { + "level0": "Naduhup", + "level1": "Eastern Naduhup", + "level2": "Hup-Yuhup" + }, + "yac": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Ngalik-Nduga", + "level3": "Yalic" + }, + "yad": { + "level0": "Peba-Yagua" + }, + "yaf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Yaka-Suku" + }, + "yah": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Eastern Iranian", + "level5": "Shughni-Yazgulami" + }, + "yai": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Iranian PBS", + "level5": "Sogdic-Ossetic", + "level6": "Sogdic", + "level7": "Sogdian-Yagnobi" + }, + "yaj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic", + "level7": "Central Bandaic" + }, + "yak": { + "level0": "Sahaptian", + "level1": "Sahaptin", + "level2": "Northern Sahaptin" + }, + "yal": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Central Mande", + "level4": "Susu-Yalunka" + }, + "yam": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Nkambe" + }, + "yan": { + "level0": "Misumalpan", + "level1": "Sumalpan", + "level2": "Sumuic" + }, + "yao": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Ruvuma", + "level9": "Yaoic" + }, + "yap": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Yapesic" + }, + "yaq": { + "level0": "Uto-Aztecan", + "level1": "Southern Uto-Aztecan", + "level2": "Cahitan" + }, + "yar": { + "level0": "Cariban", + "level1": "Venezuelan Cariban", + "level2": "Mapoyo-Tamanaku", + "level3": "Mapoyo-Yawarana" + }, + "yas": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Bati-Mbure-Yambassa", + "level10": "Mbure-Yambassa", + "level11": "Yambassa (A.60)", + "level12": "Mmala-Elip-Gunu", + "level13": "Elip-Gunu" + }, + "yat": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Sanaga-West Mbam (A.40)", + "level10": "West Mbam (A.40)" + }, + "yav": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Mbam-Bubi", + "level7": "Mbam", + "level8": "Nuclear Mbam", + "level9": "Bati-Mbure-Yambassa", + "level10": "Mbure-Yambassa", + "level11": "Yambassa (A.60)" + }, + "yaw": { + "level0": "Arawakan", + "level1": "Central-Eastern Maipuran", + "level2": "Central Maipuran", + "level3": "Xinguan Arawak" + }, + "yax": { + "level0": "Bookkeeping" + }, + "yay": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "North-South Central Delta Cross", + "level7": "Ubaghara-Kohumono", + "level8": "Kohumonoic" + }, + "yaz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Delta Cross", + "level4": "Upper Cross", + "level5": "Central Upper Cross", + "level6": "East-West Central Delta Cross", + "level7": "Lokoic" + }, + "yba": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Idomoid", + "level4": "Akweya", + "level5": "Etulo-Idoma", + "level6": "Nuclear Idoma" + }, + "ybb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Mbam-Nkam", + "level8": "Southern Mbam-Nkam", + "level9": "Bamileke", + "level10": "West Bamileke", + "level11": "Bamboutos" + }, + "ybd": { + "level0": "Bookkeeping" + }, + "ybe": { + "level0": "Turkic", + "level1": "Common Turkic" + }, + "ybh": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Tamar", + "level6": "Yakkha-Athpariyic" + }, + "ybi": { + "level0": "Sino-Tibetan", + "level1": "Himalayish", + "level2": "Mahakiranti", + "level3": "Kiranti", + "level4": "Eastern Kiranti", + "level5": "Upper Arun", + "level6": "Lohorung-Yamphu", + "level7": "Yamphuic" + }, + "ybj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Alumic", + "level5": "Hasha-Sambe" + }, + "ybk": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji", + "level9": "Thopho-Core Muji", + "level10": "Core Muji", + "level11": "Nuclear Core Muji", + "level12": "Bokha-Phuma" + }, + "ybl": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Yukubenic" + }, + "ybm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Numugenan", + "level6": "Karian-Usan-Yaben" + }, + "ybn": { + "level0": "Arawakan", + "level1": "Medio Rio Negro", + "level2": "Marauia-Castana" + }, + "ybo": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Greater Yaganon", + "level4": "Yaganon" + }, + "ybx": { + "level0": "Walioic" + }, + "yby": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria" + }, + "ych": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nasu-Gepu", + "level11": "Unclassified Nasu-Gepu" + }, + "ycl": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Lipo-Lolopo" + }, + "ycn": { + "level0": "Arawakan", + "level1": "Japura-Colombia", + "level2": "Nuclear Japura-Colombia", + "level3": "Caqueta" + }, + "ycp": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Ha-Ya", + "level8": "Akhaic" + }, + "ycr": { + "level0": "Japonic", + "level1": "Japanesic", + "level2": "Japan-Taiwan Japanese" + }, + "yda": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Guwa-Yanda" + }, + "ydd": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Middle German", + "level7": "East Middle German", + "level8": "Schlesisch-Wilmesau" + }, + "yde": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Nuclear Palai", + "level4": "Yangum-Ambrak", + "level5": "Yangum" + }, + "ydg": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Central Eastern Iranian", + "level5": "Yidgha-Munji" + }, + "ydk": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Mabuso", + "level4": "Hanseman", + "level5": "Rempic" + }, + "yds": { + "level0": "Bookkeeping" + }, + "yea": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Malayalamoid", + "level10": "Ravulic" + }, + "yec": { + "level0": "Mixed Language", + "level1": "German-Yiddish-Romani-Rotwelsch" + }, + "yee": { + "level0": "Lower Sepik", + "level1": "Karawarian" + }, + "yei": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Northern Bantoid", + "level5": "Mambiloid", + "level6": "Nizaa-Mambila-Vute", + "level7": "Konja-Mambila-Vute", + "level8": "Mambila-Vute", + "level9": "Mambila-Mbongno", + "level10": "Mambila", + "level11": "Njerup" + }, + "yej": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Graeco-Phrygian", + "level3": "Greek", + "level4": "South Greek", + "level5": "Central Greek", + "level6": "Koineic Greek", + "level7": "Modern Koineic Greek" + }, + "yel": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Cuvette", + "level10": "Nkutsuic" + }, + "yen": { + "level0": "Bookkeeping" + }, + "yer": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "Tarokoid", + "level5": "Yangkam-Tarok-Pe", + "level6": "Tarok-Pe" + }, + "yes": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Koroic" + }, + "yeu": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Yerukula-Korava-Kaikadi" + }, + "yev": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "West Palai", + "level3": "Agi-Yeri" + }, + "yey": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu" + }, + "ygl": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Nuclear Palai", + "level4": "Yangum-Ambrak", + "level5": "Yangum" + }, + "ygm": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Warup", + "level4": "Nuclear Warup", + "level5": "Degenanic" + }, + "ygp": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nasu-Gepu" + }, + "ygr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Siane-Yagaria", + "level5": "Kamano-Yagaria" + }, + "ygs": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "ygu": { + "level0": "Unattested", + "level1": "Mangarrayi-Maran (Unattested)" + }, + "ygw": { + "level0": "Angan", + "level1": "Nuclear Angan" + }, + "yha": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Southwestern Kra", + "level3": "Southern Kra" + }, + "yhd": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "Eastern Arabic", + "level7": "Qeltu" + }, + "yhl": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Phowa", + "level8": "Hlepho-Phukha" + }, + "yia": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan" + }, + "yib": { + "level0": "Bookkeeping" + }, + "yif": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Unclassified Nisoid" + }, + "yig": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nesu" + }, + "yih": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "High German", + "level6": "Upper German", + "level7": "Middle-Modern High German", + "level8": "Modern High German", + "level9": "Upper Franconian", + "level10": "Greater East Franconian" + }, + "yii": { + "level0": "Pama-Nyungan", + "level1": "Yimidhirr-Yalanji-Yidinic", + "level2": "Yidinic" + }, + "yij": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda", + "level5": "Yindjibarndi-Kurrama" + }, + "yik": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu", + "level9": "Lalo", + "level10": "Greater Lalo", + "level11": "Core Lalo" + }, + "yil": { + "level0": "Pama-Nyungan", + "level1": "Ngarna", + "level2": "Southern Ngarna", + "level3": "Ngarru" + }, + "yim": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Angami-Ao", + "level3": "Central Naga", + "level4": "Yimchingric" + }, + "yin": { + "level0": "Austroasiatic", + "level1": "Khasi-Palaung", + "level2": "Palaungic", + "level3": "West Palaungic", + "level4": "Riang" + }, + "yio": { + "level0": "Bookkeeping" + }, + "yip": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish" + }, + "yiq": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Lipo-Lolopo", + "level7": "Lipo-Micha" + }, + "yir": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Greater Awyu", + "level4": "Awyu-Dumut", + "level5": "Awyu" + }, + "yis": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Yau-Yis" + }, + "yit": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu", + "level9": "Lalo", + "level10": "Greater Lalo", + "level11": "Core Lalo", + "level12": "Unclassified Core Lalu" + }, + "yiu": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid" + }, + "yiv": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nisu-Nyisu", + "level8": "Nisu", + "level9": "Nuclear Nisu", + "level10": "Northern Nisu" + }, + "yix": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Axioid", + "level7": "Sani-Axi-Azhe", + "level8": "Sani-Axi" + }, + "yiy": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Southwest Pama", + "level3": "Coastal Southwest Paman" + }, + "yiz": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Axioid", + "level7": "Sani-Axi-Azhe", + "level8": "Sani-Axi" + }, + "yka": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Basap-Greater Barito", + "level3": "Greater Barito linkage", + "level4": "Sama-Bajaw" + }, + "ykg": { + "level0": "Yukaghir" + }, + "ykh": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Eastern Mongolic" + }, + "yki": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "South Halmahera-West New Guinea", + "level4": "Lower Mamberamo", + "level5": "Yoke-Pauwi" + }, + "ykk": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Are-Taupota linkage", + "level9": "Taupota linkage", + "level10": "Nuclear Taupota linkage", + "level11": "Wedauic" + }, + "ykl": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Phowa", + "level8": "Hlepho-Phukha", + "level9": "Khlula-Zokhuo" + }, + "ykm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Schouten linkage", + "level7": "Siau", + "level8": "Sissano-Tumleo", + "level9": "Ali-Tumleo" + }, + "ykn": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu", + "level9": "Kuansi-Kuamasi-Sonaga", + "level10": "Kuansi-Kuamasi" + }, + "yko": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Sawabantu", + "level8": "Bengaic", + "level9": "Yasa-Kombe" + }, + "ykr": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean" + }, + "ykt": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Mondzish" + }, + "yku": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu", + "level9": "Kuansi-Kuamasi-Sonaga", + "level10": "Kuansi-Kuamasi" + }, + "yky": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Ngbandi-Mongoba-Kazibati", + "level6": "Ngbandic", + "level7": "Nuclear Ngbandic" + }, + "yla": { + "level0": "Keram", + "level1": "Ulmapo" + }, + "ylg": { + "level0": "Ndu", + "level1": "Nuclear Ndu", + "level2": "Manambu-Yalaku" + }, + "yli": { + "level0": "Nuclear Trans New Guinea", + "level1": "Dani", + "level2": "Ngalik-Nduga", + "level3": "Yalic" + }, + "yll": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Ningil-Yil" + }, + "ylm": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Lipo-Lolopo", + "level7": "Unclassified Lipo-Lolopo" + }, + "yln": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Eastern Kra", + "level3": "Buyang", + "level4": "Northern Buyang" + }, + "ylo": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Unclassified Lisoid" + }, + "ylr": { + "level0": "Pama-Nyungan", + "level1": "Kalkatungic" + }, + "ylu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Lower Markham", + "level9": "Busu" + }, + "yly": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Southern Melanesian", + "level5": "New Caledonian", + "level6": "Mainland New Caledonian", + "level7": "Northern New Caledonian", + "level8": "Extreme Northern New Caledonian", + "level9": "Nyalayu" + }, + "yma": { + "level0": "Bookkeeping" + }, + "ymb": { + "level0": "Nuclear Torricelli", + "level1": "Kombio-Arapesh-Urat", + "level2": "Kombio-Yambes" + }, + "ymc": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji", + "level9": "Thopho-Core Muji", + "level10": "Core Muji", + "level11": "Nuclear Core Muji", + "level12": "Northern-Southern Muji" + }, + "ymd": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Hani-Jino", + "level5": "Bisoid-Hanic", + "level6": "Hanic", + "level7": "Ha-Ya", + "level8": "Akhaic" + }, + "yme": { + "level0": "Peba-Yagua", + "level1": "Peba-Yameo" + }, + "ymg": { + "level0": "Bookkeeping" + }, + "ymh": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Lipo-Lolopo", + "level7": "Unclassified Lipo-Lolopo", + "level8": "Southwestern Lolo" + }, + "ymi": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji" + }, + "ymj": { + "level0": "Bookkeeping" + }, + "ymk": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Rufiji-Ruvuma", + "level8": "Ruvuma", + "level9": "Makonde-Makwe" + }, + "yml": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Nuclear Papuan Tip linkage", + "level7": "North Papuan Mainland-D'Entrecasteaux linkage", + "level8": "Bwaidoga linkage", + "level9": "Iamalelic" + }, + "ymm": { + "level0": "Afro-Asiatic", + "level1": "Cushitic", + "level2": "East Cushitic", + "level3": "Lowland East Cushitic", + "level4": "Southern Lowland East Cushitic", + "level5": "Mainstream Lowland East Cushitic", + "level6": "Omo-Tana", + "level7": "Eastern Omo-Tana" + }, + "ymn": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Sarmi-Jayapura Bay", + "level7": "Sarmi" + }, + "ymo": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Nuclear Palai", + "level4": "Yangum-Ambrak", + "level5": "Yangum" + }, + "ymp": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Misim-Yamap" + }, + "ymq": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji", + "level9": "Thopho-Core Muji", + "level10": "Core Muji" + }, + "ymr": { + "level0": "Dravidian", + "level1": "South Dravidian", + "level2": "South Dravidian I", + "level3": "Tamil-Kannada", + "level4": "Tamil-Kota", + "level5": "Tamil-Toda", + "level6": "Tamil-Irula", + "level7": "Tamil-Kodagu", + "level8": "Tamil-Malayalam", + "level9": "Tamiloid", + "level10": "Malasa-Eravallan" + }, + "ymx": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji", + "level9": "Thopho-Core Muji", + "level10": "Core Muji", + "level11": "Nuclear Core Muji", + "level12": "Northern-Southern Muji" + }, + "ymz": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji", + "level9": "Thopho-Core Muji", + "level10": "Core Muji", + "level11": "Nuclear Core Muji" + }, + "yna": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Unclassified Nuclear Nisoid" + }, + "ynd": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Central Karnic", + "level3": "Western Central Karnic", + "level4": "Yandruwandhic" + }, + "yne": { + "level0": "Bookkeeping" + }, + "yng": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Mundu-Baka", + "level6": "Western Mundu-Baka", + "level7": "River Western Mundu-Baka", + "level8": "Monzomboic", + "level9": "Kpala-Bakpa" + }, + "ynh": { + "level0": "Bookkeeping" + }, + "ynk": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo", + "level2": "Yupik" + }, + "ynl": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Rai Coast", + "level3": "Nuru" + }, + "yno": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Tai P", + "level9": "Shanic", + "level10": "Sukaphic", + "level11": "Northern Shanic", + "level12": "Sipsongpannic" + }, + "ynq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Yandangic", + "level7": "Waka-Yendang-Teme", + "level8": "Waka-Yandang" + }, + "yns": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie" + }, + "ynu": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "South Eastern Tucanoan" + }, + "yob": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "Oumic", + "level9": "Magoric" + }, + "yog": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Northern Luzon", + "level3": "Cagayan Valley", + "level4": "Ibanagic", + "level5": "Gaddangic" + }, + "yoi": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Southern Ryukyu", + "level3": "Macro-Yaeyama" + }, + "yok": { + "level0": "Yokutsan", + "level1": "General Yokuts", + "level2": "Nim Yokuts" + }, + "yol": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "North Sea Germanic", + "level6": "Anglo-Frisian", + "level7": "Anglic", + "level8": "Later Anglic", + "level9": "Middle-Modern English" + }, + "yom": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended", + "level13": "Kikongo Language Cluster", + "level14": "Nuclear KLC", + "level15": "Kikongoic", + "level16": "Kambakunyic Kikongo", + "level17": "Kilaadic Kikongo", + "level18": "Central-Southern Kikongo", + "level19": "West Kikongo" + }, + "yon": { + "level0": "Nuclear Trans New Guinea", + "level1": "Asmat-Awyu-Ok", + "level2": "Awyu-Ok", + "level3": "Ok-Oksapmin", + "level4": "Ok", + "level5": "Lowland Ok", + "level6": "Division A Lowland Ok" + }, + "yor": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Defoid", + "level4": "Yoruboid", + "level5": "Edekiri", + "level6": "Ede", + "level7": "Eastern Ede", + "level8": "Southeastern Ede", + "level9": "Nuclear Yoruba", + "level10": "Lucumi-Yoruba" + }, + "yos": { + "level0": "Bookkeeping" + }, + "yot": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Central Adamawa", + "level5": "Mumuye-Yandang", + "level6": "Yandangic", + "level7": "Bali-Kpasam" + }, + "yox": { + "level0": "Japonic", + "level1": "Ryukyuan", + "level2": "Northern Ryukyuan", + "level3": "Amami" + }, + "yoy": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai", + "level6": "Sapa-Southwestern Tai", + "level7": "Southwestern Tai", + "level8": "Southwestern Thai PH", + "level9": "Lao-Thai", + "level10": "Sakon Nakhon" + }, + "ypa": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Riverine Phula", + "level7": "Upriver Riverine Phula" + }, + "ypb": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Phowa", + "level8": "Ani-Labo" + }, + "ypg": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Riverine Phula", + "level7": "Upriver Riverine Phula", + "level8": "Pholic" + }, + "yph": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Riverine Phula", + "level7": "Downriver Riverine Phula", + "level8": "Phupha-Alugu" + }, + "ypl": { + "level0": "Bookkeeping" + }, + "ypm": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji", + "level9": "Thopho-Core Muji", + "level10": "Core Muji", + "level11": "Nuclear Core Muji", + "level12": "Bokha-Phuma" + }, + "ypn": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Phowa", + "level8": "Ani-Labo" + }, + "ypo": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Riverine Phula", + "level7": "Upriver Riverine Phula", + "level8": "Pholic" + }, + "ypp": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Riverine Phula", + "level7": "Downriver Riverine Phula", + "level8": "Phupa-Phuza" + }, + "ypw": { + "level0": "Bookkeeping" + }, + "ypz": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Riverine Phula", + "level7": "Downriver Riverine Phula", + "level8": "Phupa-Phuza" + }, + "yrb": { + "level0": "Yareban", + "level1": "Yareba-Bariji-Nawaru" + }, + "yre": { + "level0": "Mande", + "level1": "Eastern Mande", + "level2": "Southeastern Mande", + "level3": "Mano-Dan", + "level4": "Guro-Dan", + "level5": "Guro-Yaoure" + }, + "yri": { + "level0": "Bookkeeping" + }, + "yrk": { + "level0": "Uralic", + "level1": "Samoyedic", + "level2": "Enets-Nenets", + "level3": "Nenets" + }, + "yrl": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup III" + }, + "yrn": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Eastern Kra", + "level3": "Buyang" + }, + "yro": { + "level0": "Yanomamic", + "level1": "Ninam-Yanomam-Yaroame", + "level2": "Yanomam-Yaroame" + }, + "yrs": { + "level0": "Bookkeeping" + }, + "yrw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Madang", + "level2": "Croisilles", + "level3": "Greater Northern Adelbert", + "level4": "Northern Adelbert", + "level5": "Numugenan", + "level6": "Yarawata-Parawen-Ukuriguma" + }, + "ysd": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Kazhouish" + }, + "ysg": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu", + "level9": "Kuansi-Kuamasi-Sonaga" + }, + "ysl": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "LSFic", + "level3": "Yugoslav Sign" + }, + "ysm": { + "level0": "Sign Language", + "level1": "L1 Sign Language", + "level2": "Myanmar Sign" + }, + "ysn": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Axioid", + "level7": "Sani-Axi-Azhe" + }, + "yso": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Unclassified Southeastern Ngwi" + }, + "ysp": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Lipo-Lolopo", + "level7": "Unclassified Lipo-Lolopo", + "level8": "Southwestern Lolo" + }, + "ysr": { + "level0": "Eskimo-Aleut", + "level1": "Eskimo" + }, + "yss": { + "level0": "Sepik", + "level1": "Sepik Tama", + "level2": "Mayo-Pasi" + }, + "ysy": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nasu-Gepu" + }, + "yta": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu" + }, + "ytl": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid" + }, + "ytp": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Muji", + "level8": "Laghuu-Core Muji", + "level9": "Thopho-Core Muji" + }, + "ytw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Yupna", + "level4": "Unclassified Yupna" + }, + "yua": { + "level0": "Mayan", + "level1": "Core Mayan", + "level2": "Yucatecan", + "level3": "Nuclear Yucatecan", + "level4": "Yucatec-Lacandon" + }, + "yub": { + "level0": "Pama-Nyungan", + "level1": "East Queensland Border Pama Nyungan", + "level2": "Yugambalic", + "level3": "Yugambal-Bigambal" + }, + "yud": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Central Semitic", + "level4": "Arabian", + "level5": "Arabic", + "level6": "North African Arabic" + }, + "yue": { + "level0": "Sino-Tibetan", + "level1": "Sinitic", + "level2": "Classical-Middle-Modern Sinitic", + "level3": "Middle-Modern Sinitic", + "level4": "Yue-Pinghua" + }, + "yuf": { + "level0": "Cochimi-Yuman", + "level1": "Yuman", + "level2": "General Yuman", + "level3": "Pai" + }, + "yug": { + "level0": "Yeniseian", + "level1": "Northern Yeniseian" + }, + "yui": { + "level0": "Tucanoan", + "level1": "Eastern Tucanoan", + "level2": "Eastern Eastern Tucanoan", + "level3": "Eastern Eastern Tucanoan II", + "level4": "Pisamira-Yuruti", + "level5": "Tuyuca-Yuruti" + }, + "yuj": { + "level0": "Pauwasi", + "level1": "Eastern Pauwasi" + }, + "yuk": { + "level0": "Yuki-Wappo" + }, + "yul": { + "level0": "Central Sudanic", + "level1": "Sara-Bongo-Bagirmi", + "level2": "SBB Occidental" + }, + "yum": { + "level0": "Cochimi-Yuman", + "level1": "Yuman", + "level2": "General Yuman", + "level3": "River Yuman" + }, + "yun": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Bena-Mboi", + "level5": "Bena", + "level6": "Yungur-Voro" + }, + "yup": { + "level0": "Cariban", + "level1": "Opon-Yukpan", + "level2": "Yukpan" + }, + "yuq": { + "level0": "Tupian", + "level1": "Eastern Tupian", + "level2": "Maweti-Guarani", + "level3": "Aweti-Guarani", + "level4": "Tupi-Guarani", + "level5": "Southern Tupi-Guarani", + "level6": "Tupi-Guarani Subgroup II", + "level7": "Warazu-Sirionoid", + "level8": "Sirionoid" + }, + "yur": { + "level0": "Algic" + }, + "yus": { + "level0": "Bookkeeping" + }, + "yut": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Yupna", + "level4": "Kewieng-Bonkiman-Nokopo" + }, + "yuu": { + "level0": "Bookkeeping" + }, + "yuw": { + "level0": "Nuclear Trans New Guinea", + "level1": "Finisterre-Huon", + "level2": "Finisterre-Saruwaged", + "level3": "Uruwa" + }, + "yux": { + "level0": "Yukaghir", + "level1": "Kolymic" + }, + "yuy": { + "level0": "Mongolic-Khitan", + "level1": "Mongolic", + "level2": "Southern Periphery Mongolic" + }, + "yva": { + "level0": "Yawa-Saweru" + }, + "yvt": { + "level0": "Arawakan", + "level1": "Alto Orinoco", + "level2": "Parenic" + }, + "ywa": { + "level0": "Sepik", + "level1": "Sepik Tama", + "level2": "Mayo-Pasi", + "level3": "Yimin-Bel" + }, + "ywg": { + "level0": "Pama-Nyungan", + "level1": "South-West Pama-Nyungan", + "level2": "Pilbara", + "level3": "Ngayarda", + "level4": "Central Ngayarda", + "level5": "Panytyima-Yinhawangka" + }, + "ywl": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu", + "level9": "Lalo", + "level10": "Greater Lalo", + "level11": "Core Lalo", + "level12": "Central-Western Lalo" + }, + "ywm": { + "level0": "Bookkeeping" + }, + "ywn": { + "level0": "Pano-Tacanan", + "level1": "Panoan", + "level2": "Mainline Pano", + "level3": "Pano Nawa", + "level4": "Headwaters Pano", + "level5": "Yaminawa Complex" + }, + "ywq": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nasu-Gepu" + }, + "ywr": { + "level0": "Nyulnyulan", + "level1": "Eastern Nyulnyulan", + "level2": "Yawuric" + }, + "ywt": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Lisoid", + "level6": "Nuclear Lisoid", + "level7": "Lisu-Laluba-Lavu", + "level8": "Laluba-Lavu", + "level9": "Lalo", + "level10": "Greater Lalo", + "level11": "Core Lalo", + "level12": "Central-Western Lalo" + }, + "ywu": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nesu" + }, + "yww": { + "level0": "Pama-Nyungan", + "level1": "Karnic", + "level2": "Central Karnic", + "level3": "Western Central Karnic", + "level4": "Yandruwandhic" + }, + "yxm": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Northern Pama" + }, + "yym": { + "level0": "Bookkeeping" + }, + "yyu": { + "level0": "Nuclear Torricelli", + "level1": "Wapei-Palei", + "level2": "Central Torricelli", + "level3": "Wapeic", + "level4": "Yau-Yis" + }, + "yyz": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Nisoid", + "level7": "Nuclear Nisoid", + "level8": "Nasu-Nosu", + "level9": "Nesu-Nasu", + "level10": "Nasu-Gepu", + "level11": "Unclassified Nasu-Gepu" + }, + "yzg": { + "level0": "Tai-Kadai", + "level1": "Kadaic", + "level2": "Eastern Kra", + "level3": "Buyang", + "level4": "Northern Buyang" + }, + "yzk": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Ni-Li-Kazhouish", + "level5": "Southeastern Ngwi", + "level6": "Highland Phula", + "level7": "Phowa", + "level8": "Hlepho-Phukha", + "level9": "Khlula-Zokhuo" + }, + "zaa": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Sierra Juarezic" + }, + "zab": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec" + }, + "zac": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec", + "level9": "Extended Ocotepec Zapotec" + }, + "zad": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Cajonosic" + }, + "zae": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Sierra Juarezic" + }, + "zaf": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "zag": { + "level0": "Saharan", + "level1": "Eastern Saharan" + }, + "zah": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Guruntumic", + "level7": "Tala-Sho-Zangwal", + "level8": "Tala-Zamwar" + }, + "zai": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec", + "level9": "Extended Ocotepec Zapotec" + }, + "zaj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "East Ruvu", + "level11": "Central East Ruvu", + "level12": "Kutu-Zaramo" + }, + "zak": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "East Nyanza", + "level10": "Nyanza Mara", + "level11": "South Mara", + "level12": "Southwest Mara" + }, + "zal": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Lolo-Burmese", + "level3": "Loloish", + "level4": "Nusoish" + }, + "zam": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Miahuatecano", + "level8": "Miahuateco" + }, + "zao": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Miahuatecano", + "level8": "Miahuateco" + }, + "zaq": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Sierra Juarezic" + }, + "zar": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Rinconic" + }, + "zas": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "zat": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Cajonosic" + }, + "zau": { + "level0": "Sino-Tibetan", + "level1": "Bodic", + "level2": "Bodish", + "level3": "Early Old Tibetan", + "level4": "Western Archaic Tibetan", + "level5": "Kenhatic" + }, + "zav": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Cajonosic" + }, + "zaw": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "zax": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Cisyautepeque\u00f1o" + }, + "zay": { + "level0": "Ta-Ne-Omotic", + "level1": "Ometo", + "level2": "East Ometo" + }, + "zaz": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi", + "level7": "Zakse-Saya" + }, + "zbc": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Berawan", + "level6": "Central-East Berawan" + }, + "zbe": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Berawan", + "level6": "Central-East Berawan" + }, + "zbl": { + "level0": "Artificial Language" + }, + "zbt": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Saluan-Banggai", + "level6": "Western Saluan-Banggai", + "level7": "Saluanic", + "level8": "Batui-Saluan" + }, + "zbu": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Northwest South Bauchi", + "level7": "Gejic" + }, + "zbw": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "North Borneo Malayo-Polynesian", + "level3": "North Sarawakan", + "level4": "Berawan-Lower Baram", + "level5": "Berawan" + }, + "zca": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Macrocoatecano" + }, + "zch": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Hongshui He", + "level8": "Western Hongshui He" + }, + "zdj": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Coastal NEC Bantu", + "level10": "Mijikenda-Pokomo-Comorian", + "level11": "Comorian Bantu" + }, + "zea": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Germanic", + "level3": "Northwest Germanic", + "level4": "West Germanic", + "level5": "Macro-Dutch", + "level6": "Middle-Modern Dutch", + "level7": "Modern Dutch", + "level8": "Southwestern Dutch", + "level9": "Zeeuwic" + }, + "zeg": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "South Huon Gulf linkage", + "level8": "Buang linkage", + "level9": "Mumeng", + "level10": "Zenag-Patep" + }, + "zeh": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Hongshui He", + "level8": "Western Hongshui He" + }, + "zem": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Southwest South Bauchi", + "level7": "Zeemic", + "level8": "Nuclear Zeemic" + }, + "zen": { + "level0": "Afro-Asiatic", + "level1": "Berber", + "level2": "Western Berber" + }, + "zga": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Southern Tanzania Highlands Bantu", + "level9": "Kinga-Magoma" + }, + "zgb": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai" + }, + "zgm": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Debao-Jingxi-Nung" + }, + "zgn": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Yei Zhuang" + }, + "zgr": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "Papuan Tip linkage", + "level6": "Peripheral Papuan Tip linkage", + "level7": "Central Papuan Oceanic", + "level8": "Oumic", + "level9": "Magoric" + }, + "zhb": { + "level0": "Sino-Tibetan", + "level1": "Burmo-Qiangic", + "level2": "Na-Qiangic", + "level3": "Qiangic" + }, + "zhd": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Wenma-Southwestern Tai" + }, + "zhi": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Benue-Congo Plateau", + "level4": "West-Central Benue-Congo Plateau", + "level5": "Northwestern Benue-Congo Plateau", + "level6": "Hyamic", + "level7": "Zhiric" + }, + "zhn": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Debao-Jingxi-Nung" + }, + "zhw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Wide Grassfields", + "level6": "Narrow Grassfields", + "level7": "Ring", + "level8": "Center-West Ring", + "level9": "West Ring", + "level10": "Aghemic" + }, + "zia": { + "level0": "Nuclear Trans New Guinea", + "level1": "Greater Binanderean", + "level2": "Binanderean", + "level3": "North Binanderean" + }, + "zib": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "zik": { + "level0": "Anim", + "level1": "Marind-Boazi-Yaqai", + "level2": "Boazi" + }, + "zil": { + "level0": "Mande", + "level1": "Western Mande", + "level2": "Manding-Kpelle", + "level3": "Southwest Mande", + "level4": "Mende-Loma", + "level5": "Mende-Bandi", + "level6": "Bandi-Zialo" + }, + "zim": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "South Masa" + }, + "zin": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Great Lakes Bantu", + "level9": "West Nyanza" + }, + "ziw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Northeast Savanna Bantu", + "level8": "Northeast Coastal Bantu", + "level9": "Ruvu", + "level10": "West Ruvu", + "level11": "Seuta", + "level12": "Zigua-Nguu" + }, + "ziz": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Biu-Mandara", + "level3": "South Biu-Mandara", + "level4": "Bataic", + "level5": "Gudeic", + "level6": "Gude-Jimi-Zizilivakan" + }, + "zka": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Celebic", + "level3": "Greater Eastern Celebic", + "level4": "Eastern Celebic", + "level5": "Southeastern Celebic", + "level6": "Muna-Buton", + "level7": "Nuclear Muna-Buton", + "level8": "Munan", + "level9": "Munic" + }, + "zkg": { + "level0": "Unclassifiable" + }, + "zko": { + "level0": "Yeniseian" + }, + "zkp": { + "level0": "Nuclear-Macro-Je", + "level1": "Je", + "level2": "Southern Je", + "level3": "Kaingang-Xokleng", + "level4": "Kaingangic" + }, + "zkr": { + "level0": "Sino-Tibetan", + "level1": "Kman-Meyor" + }, + "zkt": { + "level0": "Mongolic-Khitan" + }, + "zku": { + "level0": "Pama-Nyungan", + "level1": "Arandic-Thura-Yura", + "level2": "Thura-Yura", + "level3": "Core Thura Yura", + "level4": "Southern Thura-Yura" + }, + "zla": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "Luban", + "level8": "Luba-Kaonde" + }, + "zlj": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Lianshan-Liujiang" + }, + "zlm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Greater Riau-Johoric" + }, + "zln": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Lianshan-Liujiang" + }, + "zlq": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Hongshui He" + }, + "zlu": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi West", + "level6": "Northwest South Bauchi", + "level7": "Polci-Luri", + "level8": "Polcic", + "level9": "Zulic" + }, + "zma": { + "level0": "Western Daly", + "level1": "Maranunggu-Ame-Manda", + "level2": "Ame-Manda" + }, + "zmb": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Mituku-Lega", + "level9": "Songola-Binja" + }, + "zmc": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric", + "level5": "Margany-Gunya" + }, + "zmd": { + "level0": "Western Daly", + "level1": "Bringen", + "level2": "Marithielic" + }, + "zme": { + "level0": "Giimbiyu" + }, + "zmf": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "Kwilu-Ngounie", + "level13": "Kasai-Ngounie" + }, + "zmg": { + "level0": "Western Daly", + "level1": "Bringen", + "level2": "Maringarr-Matige" + }, + "zmh": { + "level0": "Baining", + "level1": "Unclassified Baining" + }, + "zmi": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Northern Sumatra Malay", + "level6": "Kerinci-Minangkabau", + "level7": "Minangkabauic" + }, + "zmj": { + "level0": "Western Daly", + "level1": "Bringen", + "level2": "Marithielic" + }, + "zmk": { + "level0": "Pama-Nyungan", + "level1": "Greater Maric", + "level2": "Guwa-Maric", + "level3": "Maric", + "level4": "Southern Maric" + }, + "zml": { + "level0": "Eastern Daly" + }, + "zmm": { + "level0": "Western Daly", + "level1": "Bringen" + }, + "zmn": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Bantu A-B10-B20-B30", + "level7": "Ngomic", + "level8": "Nuclear Ngomic" + }, + "zmo": { + "level0": "Eastern Jebel", + "level1": "Aka-Kelo-Molo" + }, + "zmp": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "West-Coastal Bantu", + "level8": "Nzadic", + "level9": "Lweric", + "level10": "Dingic", + "level11": "Loange-Atlantic", + "level12": "KLC Extended" + }, + "zmq": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Greater Lega", + "level8": "Mituku-Lega", + "level9": "Mitukuic" + }, + "zmr": { + "level0": "Western Daly", + "level1": "Maranunggu-Ame-Manda" + }, + "zms": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic" + }, + "zmt": { + "level0": "Western Daly", + "level1": "Bringen", + "level2": "Maringarr-Matige" + }, + "zmu": { + "level0": "Pama-Nyungan", + "level1": "Southeastern Pama-Nyungan", + "level2": "New South Wales Pama-Nyungan", + "level3": "Muruwaric" + }, + "zmv": { + "level0": "Pama-Nyungan", + "level1": "Paman", + "level2": "Lamalamic" + }, + "zmw": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Inner Basin Bantu", + "level9": "Keleic", + "level10": "Kele-Poke", + "level11": "So-Poke", + "level12": "So-Lebonya", + "level13": "Lebonya", + "level14": "Bantu D33", + "level15": "Budu-Ndaka-Mbo", + "level16": "Ndaka-Mbo" + }, + "zmx": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "Central-Western Bantu", + "level7": "North Zaire River", + "level8": "Rivers Bantu", + "level9": "Likouala-Sangha", + "level10": "Impfondoic" + }, + "zmy": { + "level0": "Western Daly", + "level1": "Bringen", + "level2": "Marithielic" + }, + "zmz": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Bandaic", + "level6": "Nuclear Bandaic" + }, + "zna": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Bua-Kim-Day", + "level4": "Adamawa Bua", + "level5": "Inland Bua", + "level6": "Goulaic", + "level7": "Zan-Kulaalic" + }, + "zne": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "North Volta-Congo", + "level3": "Cameroun-Ubangian", + "level4": "Ubangi", + "level5": "Zandic", + "level6": "Zande-Nzakara" + }, + "zng": { + "level0": "Austroasiatic", + "level1": "Mangic" + }, + "znk": { + "level0": "Unattested", + "level1": "Iwaidjan Proper (Unattested)" + }, + "zns": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "West Chadic", + "level3": "West Chadic B", + "level4": "West Chadic B.3", + "level5": "South Bauchi East", + "level6": "Boghomic", + "level7": "Kir-Mangas" + }, + "zoc": { + "level0": "Mixe-Zoque", + "level1": "Zoque", + "level2": "Chiapas-Jitotolteco Zoque", + "level3": "Chiapas Zoque" + }, + "zoh": { + "level0": "Mixe-Zoque", + "level1": "Zoque" + }, + "zom": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Peripheral Kuki-Chin", + "level4": "Northeastern Kuki-Chin", + "level5": "Sizangic" + }, + "zoo": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "zoq": { + "level0": "Mixe-Zoque", + "level1": "Zoque", + "level2": "Gulf Zoque", + "level3": "Texistepec-Ayapa Zoque" + }, + "zor": { + "level0": "Mixe-Zoque", + "level1": "Zoque", + "level2": "Chiapas-Jitotolteco Zoque", + "level3": "Chiapas Zoque" + }, + "zos": { + "level0": "Mixe-Zoque", + "level1": "Zoque", + "level2": "Chiapas-Jitotolteco Zoque", + "level3": "Chiapas Zoque" + }, + "zpa": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Transyautepecan", + "level9": "Northeast Yautepec" + }, + "zpb": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Miahuatecano" + }, + "zpc": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec" + }, + "zpd": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Sierra Juarezic" + }, + "zpe": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Transyautepecan", + "level9": "Northeast Tehuantepec" + }, + "zpf": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "zpg": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Transyautepecan", + "level9": "Northeast Tehuantepec" + }, + "zph": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "West Zapotec" + }, + "zpi": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Cisyautepeque\u00f1o" + }, + "zpj": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Transyautepecan", + "level9": "Northeast Yautepec" + }, + "zpk": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Macrocoatecano", + "level7": "Amatecano" + }, + "zpl": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "West Zapotec", + "level6": "West-Central West Zapotec" + }, + "zpm": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Cisyautepeque\u00f1o", + "level9": "Mixtepec-Quioquitani-Quieri Zapotec" + }, + "zpn": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec", + "level9": "Extended Ocotepec Zapotec", + "level10": "Tilquiapanic" + }, + "zpo": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Macrocoatecano", + "level7": "Amatecano" + }, + "zpp": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "West Zapotec", + "level6": "West-Central West Zapotec" + }, + "zpq": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Cajonosic" + }, + "zpr": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Cisyautepeque\u00f1o" + }, + "zps": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Macrocoatecano", + "level7": "Coatecano", + "level8": "Coatlan-Loxicha Zapotec" + }, + "zpt": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Macrocoatecano", + "level7": "Coatecano" + }, + "zpu": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Cajonosic" + }, + "zpv": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec" + }, + "zpw": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Papabuco" + }, + "zpx": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Macrocoatecano", + "level7": "Coatecano", + "level8": "Coatlan-Loxicha Zapotec" + }, + "zpy": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "zpz": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Papabuco" + }, + "zqe": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai", + "level7": "Yei Zhuang" + }, + "zrg": { + "level0": "Bookkeeping" + }, + "zrn": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "East Chadic", + "level3": "East Chadic B", + "level4": "East Chadic B.1", + "level5": "Mubic" + }, + "zro": { + "level0": "Zaparoan", + "level1": "Zaparo-Abishira" + }, + "zrp": { + "level0": "Bookkeeping" + }, + "zrs": { + "level0": "Mairasic" + }, + "zsa": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Upper Markham", + "level9": "Mountain Upper Markham" + }, + "zsl": { + "level0": "Sign Language", + "level1": "L1 Sign Language" + }, + "zsm": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Malayo-Chamic", + "level3": "Malayic", + "level4": "Nuclear Malayic", + "level5": "Standard Malay-Indonesian" + }, + "zsu": { + "level0": "Austronesian", + "level1": "Malayo-Polynesian", + "level2": "Eastern Malayo-Polynesian", + "level3": "Oceanic", + "level4": "Western Oceanic linkage", + "level5": "North New Guinea linkage", + "level6": "Huon Gulf", + "level7": "Markham", + "level8": "Upper Markham", + "level9": "Mountain Upper Markham" + }, + "ztc": { + "level0": "Bookkeeping" + }, + "zte": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Papabuco" + }, + "ztg": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Cisyautepeque\u00f1o" + }, + "ztl": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Cisyautepeque\u00f1o" + }, + "ztm": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Miahuatecano", + "level8": "Miahuateco" + }, + "ztn": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "ztp": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Miahuatecano", + "level8": "Miahuateco" + }, + "ztq": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Cisyautepeque\u00f1o", + "level9": "Mixtepec-Quioquitani-Quieri Zapotec" + }, + "zts": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec", + "level9": "Extended Ocotepec Zapotec", + "level10": "Tilquiapanic" + }, + "ztt": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec" + }, + "ztu": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec" + }, + "ztx": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Central Core Zapotec", + "level8": "Western Valley Zapotec", + "level9": "Extended Ocotepec Zapotec" + }, + "zty": { + "level0": "Otomanguean", + "level1": "Eastern Otomanguean", + "level2": "Popoloca-Zapotecan", + "level3": "Zapotecan", + "level4": "Zapotec", + "level5": "Core Zapotec", + "level6": "Narrow Core Zapotec", + "level7": "Northern Core Zapotec", + "level8": "Rinconic" + }, + "zuh": { + "level0": "Nuclear Trans New Guinea", + "level1": "Kainantu-Goroka", + "level2": "Goroka", + "level3": "Nuclear Goroka", + "level4": "Gahuku" + }, + "zul": { + "level0": "Atlantic-Congo", + "level1": "Volta-Congo", + "level2": "Benue-Congo", + "level3": "Bantoid", + "level4": "Southern Bantoid", + "level5": "Narrow Bantu", + "level6": "East Bantu", + "level7": "Southern Bantu", + "level8": "Nuclear Southern Bantu", + "level9": "Dimsuffix Southern Bantu", + "level10": "Nguni-Tsonga-Copi", + "level11": "Nguni (S.40)", + "level12": "Nuclear Nguni", + "level13": "Southern Ndebele-Lowland" + }, + "zum": { + "level0": "Indo-European", + "level1": "Classical Indo-European", + "level2": "Indo-Iranian", + "level3": "Iranian", + "level4": "Southwestern Iranian", + "level5": "Middle-Modern Persian", + "level6": "Modern Southwestern Iranian" + }, + "zuy": { + "level0": "Afro-Asiatic", + "level1": "Chadic", + "level2": "Masa", + "level3": "North Masa", + "level4": "Unclassified North Masa" + }, + "zwa": { + "level0": "Afro-Asiatic", + "level1": "Semitic", + "level2": "West Semitic", + "level3": "Ethiosemitic", + "level4": "South Ethiopic", + "level5": "Harari-East Gurage" + }, + "zyb": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Yongnan-Yongbei" + }, + "zyg": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Central-Southwestern Tai", + "level5": "Debao-Jingxi-Nung" + }, + "zyj": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Northern Tai" + }, + "zyn": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic", + "level6": "Yongnan-Yongbei" + }, + "zyp": { + "level0": "Sino-Tibetan", + "level1": "Kuki-Chin-Naga", + "level2": "Kuki-Chin", + "level3": "Central Kuki-Chin", + "level4": "Maraic", + "level5": "Nuclear Maraic" + }, + "zzj": { + "level0": "Tai-Kadai", + "level1": "Kam-Tai", + "level2": "Daic-Beic", + "level3": "Daic", + "level4": "Northern Daic-Sek", + "level5": "Northern Daic" + } +} \ No newline at end of file diff --git a/mteb/languages.py b/mteb/languages.py index f7f6477503..9b170a707f 100644 --- a/mteb/languages.py +++ b/mteb/languages.py @@ -17,7 +17,7 @@ # Language mappings path_to_lang_codes = Path(__file__).parent / "iso_639_3_to_language.json" path_to_lang_scripts = Path(__file__).parent / "iso_15924_to_script.json" - +path_to_lang_fam = Path(__file__).parent / "language_family.json" with path_to_lang_codes.open("r") as f: ISO_TO_LANGUAGE = json.load(f) @@ -25,6 +25,11 @@ with path_to_lang_scripts.open("r") as f: ISO_TO_SCRIPT = json.load(f) +with path_to_lang_fam.open("r") as f: + ISO_TO_FAM = json.load(f) + +ISO_TO_FAM_LEVEL0 = {k: v["level0"] for k, v in ISO_TO_FAM.items()} + @dataclass class LanguageScripts: diff --git a/scripts/create_language_family_mapping.py b/scripts/create_language_family_mapping.py new file mode 100644 index 0000000000..50700d9654 --- /dev/null +++ b/scripts/create_language_family_mapping.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from pyglottolog.api import Glottolog, lls +from tqdm import tqdm + +glottolog = Glottolog( + "/home/ubuntu/isaac/work/glottolog" +) # Download the Glottolog repository + + +def get_languages_with_iso_by_languoid(languoid, level=0, prev_fam=None): + # Recursively gather all descendant languages with ISO codes + if prev_fam is None: + prev_fam = {} # Start with a fresh dictionary for each top-level languoid + + if not isinstance(languoid, lls.Languoid): + return + + for descendant in languoid.children: + # Create a copy of `prev_fam` to avoid overwriting + current_fam = prev_fam.copy() + current_fam[f"level{level}"] = languoid.name + + if descendant.level.name == "language": # Direct languages + if descendant.iso: + iso_key = descendant.iso + if len(ISO2FAMILY.get(iso_key, {})) > len(current_fam): + continue + ISO2FAMILY[iso_key] = current_fam + elif descendant.level.name == "family": # Subfamilies, recurse + get_languages_with_iso_by_languoid(descendant, level + 1, current_fam) + + +all_languoids = list(glottolog.languoids()) +with Path("language_family.json").open("r") as f: + ISO2FAMILY = json.load(f) + +for languoid in tqdm(all_languoids, total=len(all_languoids)): + get_languages_with_iso_by_languoid(languoid) + +ISO2FAMILY = dict(sorted(ISO2FAMILY.items())) + +with Path("language_family.json").open("w") as f: + json.dump(ISO2FAMILY, f, indent=3) From 377a63d01e19d42d1163c9cc92b26a11ca84bf5d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 28 Nov 2024 12:16:39 +0000 Subject: [PATCH 31/76] Update tasks table --- docs/tasks.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/tasks.md b/docs/tasks.md index 23bb246cf5..0abcf2b8db 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -757,7 +757,7 @@ The following tables give you an overview of the tasks in MTEB. | bod | Tibetan | Sino-Tibetan | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | | boj | Anjam | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bon | Bine | Eastern Trans-Fly | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| bos | Bosnian | Unclassified | 3 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| bos | Bosnian | Indo-European | 3 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | box | Buamu | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | boy | Bodo (Central African Republic) | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bpr | Koronadal Blaan | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -895,7 +895,7 @@ The following tables give you an overview of the tasks in MTEB. | eri | Ogea | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ese | Ese Ejja | Pano-Tacanan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | esk | Northwest Alaska Inupiatun | Eskimo-Aleut | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| est | Estonian | Unclassified | 2 | 2 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 8 | +| est | Estonian | Uralic | 2 | 2 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 8 | | etr | Edolo | Bosavi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | eus | Basque | Unclassified | 3 | 2 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 8 | | ewe | Ewe | Atlantic-Congo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | @@ -903,7 +903,7 @@ The following tables give you an overview of the tasks in MTEB. | fai | Faiwol | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fao | Faroese | Indo-European | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 7 | | far | Fataleka | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| fas | Persian | Unclassified | 1 | 4 | 0 | 0 | 0 | 1 | 2 | 9 | 0 | 0 | 0 | 17 | +| fas | Persian | Indo-European | 1 | 4 | 0 | 0 | 0 | 1 | 2 | 9 | 0 | 0 | 0 | 17 | | ffm | Maasina Fulfulde | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fij | Fijian | Austronesian | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | fil | Filipino | Austronesian | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | @@ -977,7 +977,7 @@ The following tables give you an overview of the tasks in MTEB. | hns | Caribbean Hindustani | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hop | Hopi | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hot | Hote | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hrv | Croatian | Unclassified | 4 | 3 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 10 | +| hrv | Croatian | Indo-European | 4 | 3 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 10 | | hsb | Upper Sorbian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hto | Minica Huitoto | Huitotoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hub | Huambisa | Chicham | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1446,7 +1446,7 @@ The following tables give you an overview of the tasks in MTEB. | sri | Siriano | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | srm | Saramaccan | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | srn | Sranan Tongo | Indo-European | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| srp | Serbian | Unclassified | 4 | 1 | 1 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 9 | +| srp | Serbian | Indo-European | 4 | 1 | 1 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 9 | | srq | Sirionó | Tupian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ssd | Siroi | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ssg | Seimat | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1459,7 +1459,7 @@ The following tables give you an overview of the tasks in MTEB. | sus | Susu | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | suz | Sunwar | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | svk | Slovakian Sign Language | Sign Language | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| swa | Swahili (macrolanguage) | Unclassified | 1 | 7 | 2 | 0 | 0 | 1 | 1 | 3 | 0 | 0 | 0 | 15 | +| swa | Swahili (macrolanguage) | Atlantic-Congo | 1 | 7 | 2 | 0 | 0 | 1 | 1 | 3 | 0 | 0 | 0 | 15 | | swe | Swedish | Indo-European | 4 | 8 | 3 | 0 | 1 | 1 | 1 | 4 | 0 | 0 | 0 | 22 | | swg | Swabian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | swh | Swahili (individual language) | Atlantic-Congo | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | From e3d2b548d8df716bd5ab8ef4f080d7cff82d51cf Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Fri, 29 Nov 2024 08:35:51 +0100 Subject: [PATCH 32/76] fix: Ensure that models match the names on embedding-benchmarks/results (#1519) --- mteb/models/openai_models.py | 6 +++--- mteb/models/voyage_models.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/mteb/models/openai_models.py b/mteb/models/openai_models.py index 50967e898b..bb8550b79a 100644 --- a/mteb/models/openai_models.py +++ b/mteb/models/openai_models.py @@ -56,7 +56,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: text_embedding_3_small = ModelMeta( - name="text-embedding-3-small", + name="openai/text-embedding-3-small", revision="1", release_date="2024-01-25", languages=None, # supported languages not specified @@ -73,7 +73,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: use_instructions=False, ) text_embedding_3_large = ModelMeta( - name="text-embedding-3-large", + name="openai/text-embedding-3-large", revision="1", release_date="2024-01-25", languages=None, # supported languages not specified @@ -87,7 +87,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: memory_usage=None, ) text_embedding_ada_002 = ModelMeta( - name="text-embedding-ada-002", + name="openai/text-embedding-ada-002", revision="1", release_date="2022-12-15", languages=None, # supported languages not specified diff --git a/mteb/models/voyage_models.py b/mteb/models/voyage_models.py index ea6b25bde1..3cf8b286e1 100644 --- a/mteb/models/voyage_models.py +++ b/mteb/models/voyage_models.py @@ -141,7 +141,7 @@ def _batched_encode( } voyage_large_2_instruct = ModelMeta( - name="voyage-large-2-instruct", + name="voyageai/voyage-large-2-instruct", revision="1", release_date="2024-05-05", languages=None, # supported languages not specified @@ -163,7 +163,7 @@ def _batched_encode( ) voyage_finance_2 = ModelMeta( - name="voyage-finance-2", + name="voyageai/voyage-finance-2", revision="1", release_date="2024-05-30", languages=None, # supported languages not specified @@ -185,7 +185,7 @@ def _batched_encode( ) voyage_law_2 = ModelMeta( - name="voyage-law-2", + name="voyageai/voyage-law-2", revision="1", release_date="2024-04-15", languages=None, # supported languages not specified @@ -207,7 +207,7 @@ def _batched_encode( ) voyage_code_2 = ModelMeta( - name="voyage-code-2", + name="voyageai/voyage-code-2", revision="1", release_date="2024-01-23", languages=None, # supported languages not specified @@ -251,7 +251,7 @@ def _batched_encode( ) voyage_2 = ModelMeta( - name="voyage-2", + name="voyageai/voyage-2", revision="1", release_date="2023-10-29", languages=None, # supported languages not specified @@ -272,7 +272,7 @@ def _batched_encode( use_instructions=False, ) voyage_multilingual_2 = ModelMeta( - name="voyage-multilingual-2", + name="voyageai/voyage-multilingual-2", revision="1", release_date="2024-06-10", languages=None, # supported languages not specified From 9980c609075369bc3a8b1c9ed2926942fc73735f Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 29 Nov 2024 07:58:47 +0000 Subject: [PATCH 33/76] 1.20.5 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 333232ed9a..196c5f4985 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.20.4" +version = "1.20.5" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From b02ae826bd512d8c28afb185fa856ca76e90fc0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20Kardos?= Date: Fri, 29 Nov 2024 11:11:39 +0100 Subject: [PATCH 34/76] fix: Adding missing metadata on models and mathcing names up with the results repo (#1528) * Added Voyage 3 models * Added correct metadata to Cohere models and matched names with the results repo --- mteb/models/cohere_models.py | 169 +++++++++++++++++++++++++++++++++-- mteb/models/voyage_models.py | 44 +++++++++ 2 files changed, 208 insertions(+), 5 deletions(-) diff --git a/mteb/models/cohere_models.py b/mteb/models/cohere_models.py index 26eb5e92ed..ec86d2d1b1 100644 --- a/mteb/models/cohere_models.py +++ b/mteb/models/cohere_models.py @@ -11,6 +11,118 @@ from .wrapper import Wrapper +supported_languages = [ + "afr-Latn", + "amh-Ethi", + "ara-Arab", + "asm-Beng", + "aze-Latn", + "bel-Cyrl", + "bul-Cyrl", + "ben-Beng", + "bod-Tibt", + "bos-Latn", + "cat-Latn", + "ceb-Latn", + "cos-Latn", + "ces-Latn", + "cym-Latn", + "dan-Latn", + "deu-Latn", + "ell-Grek", + "eng-Latn", + "epo-Latn", + "spa-Latn", + "est-Latn", + "eus-Latn", + "fas-Arab", + "fin-Latn", + "fra-Latn", + "fry-Latn", + "gle-Latn", + "gla-Latn", + "glg-Latn", + "guj-Gujr", + "hau-Latn", + "haw-Latn", + "heb-Hebr", + "hin-Deva", + "hmn-Latn", + "hrv-Latn", + "hat-Latn", + "hun-Latn", + "hye-Armn", + "ind-Latn", + "ibo-Latn", + "isl-Latn", + "ita-Latn", + "jpn-Jpan", + "jav-Latn", + "kat-Geor", + "kaz-Cyrl", + "khm-Khmr", + "kan-Knda", + "kor-Kore", + "kur-Arab", + "kir-Cyrl", + "lat-Latn", + "ltz-Latn", + "lao-Laoo", + "lit-Latn", + "lav-Latn", + "mlg-Latn", + "mri-Latn", + "mkd-Cyrl", + "mal-Mlym", + "mon-Cyrl", + "mar-Deva", + "msa-Latn", + "mlt-Latn", + "mya-Mymr", + "nep-Deva", + "nld-Latn", + "nor-Latn", + "nya-Latn", + "ori-Orya", + "pan-Guru", + "pol-Latn", + "por-Latn", + "ron-Latn", + "rus-Cyrl", + "kin-Latn", + "sin-Sinh", + "slk-Latn", + "slv-Latn", + "smo-Latn", + "sna-Latn", + "som-Latn", + "sqi-Latn", + "srp-Cyrl", + "sot-Latn", + "sun-Latn", + "swe-Latn", + "swa-Latn", + "tam-Taml", + "tel-Telu", + "tgk-Cyrl", + "tha-Thai", + "tuk-Latn", + "tgl-Latn", + "tur-Latn", + "tat-Cyrl", + "uig-Arab", + "ukr-Cyrl", + "urd-Arab", + "uzb-Latn", + "vie-Latn", + "wol-Latn", + "xho-Latn", + "yid-Hebr", + "yor-Latn", + "zho-Hans", + "zul-Latn", +] + # Implementation follows https://github.com/KennethEnevoldsen/scandinavian-embedding-benchmark/blob/main/src/seb/registered_models/cohere_models.py class CohereTextEmbeddingModel(Wrapper): @@ -79,15 +191,16 @@ def encode( model_name="embed-multilingual-v3.0", model_prompts=model_prompts, ), - name="embed-multilingual-v3.0", - languages=[], # Unknown, but support >100 languages + name="Cohere/Cohere-embed-multilingual-v3.0", + languages=supported_languages, open_weights=False, revision="1", release_date="2023-11-02", n_parameters=None, memory_usage=None, max_tokens=None, - embed_dim=1024, + embed_dim=512, + reference="https://cohere.com/blog/introducing-embed-v3", license=None, similarity_fn_name="cosine", framework=["API"], @@ -100,17 +213,63 @@ def encode( model_name="embed-multilingual-v3.0", model_prompts=model_prompts, ), - name="embed-english-v3.0", + name="Cohere/Cohere-embed-english-v3.0", languages=["eng-Latn"], open_weights=False, + reference="https://cohere.com/blog/introducing-embed-v3", revision="1", release_date="2023-11-02", n_parameters=None, memory_usage=None, - max_tokens=None, + max_tokens=512, embed_dim=1024, license=None, similarity_fn_name="cosine", framework=["API"], use_instructions=False, ) + + +cohere_mult_light_3 = ModelMeta( + loader=partial( + CohereTextEmbeddingModel, + model_name="embed-multilingual-light-v3.0", + model_prompts=model_prompts, + ), + name="Cohere/Cohere-embed-multilingual-light-v3.0", + languages=supported_languages, + open_weights=False, + revision="1", + reference="https://cohere.com/blog/introducing-embed-v3", + release_date="2023-11-02", + n_parameters=None, + memory_usage=None, + max_tokens=512, + embed_dim=384, + license=None, + similarity_fn_name="cosine", + framework=["API"], + use_instructions=False, +) + +cohere_eng_light_3 = ModelMeta( + loader=partial( + CohereTextEmbeddingModel, + model_name="embed-english-light-v3.0", + model_prompts=model_prompts, + ), + name="Cohere/Cohere-embed-english-light-v3.0", + languages=["eng-Latn"], + open_weights=False, + reference="https://cohere.com/blog/introducing-embed-v3", + revision="1", + release_date="2023-11-02", + n_parameters=None, + memory_usage=None, + max_tokens=512, + embed_dim=384, + license=None, + similarity_fn_name="cosine", + framework=["API"], + use_instructions=False, +) diff --git a/mteb/models/voyage_models.py b/mteb/models/voyage_models.py index 3cf8b286e1..9f42808b37 100644 --- a/mteb/models/voyage_models.py +++ b/mteb/models/voyage_models.py @@ -292,3 +292,47 @@ def _batched_encode( framework=["API"], use_instructions=False, ) + +voyage_3 = ModelMeta( + name="voyageai/voyage-3", + revision="1", + release_date="2024-09-18", + languages=None, # supported languages not specified + loader=partial( + VoyageWrapper, + model_name="voyage-3", + model_prompts=model_prompts, + ), + max_tokens=32000, + embed_dim=1024, + open_weights=False, + n_parameters=None, + memory_usage=None, + license=None, + reference="https://blog.voyageai.com/2024/09/18/voyage-3/", + similarity_fn_name="cosine", + framework=["API"], + use_instructions=False, +) + +voyage_3_lite = ModelMeta( + name="voyageai/voyage-3-lite", + revision="1", + release_date="2024-09-18", + languages=None, # supported languages not specified + loader=partial( + VoyageWrapper, + model_name="voyage-3-lite", + model_prompts=model_prompts, + ), + max_tokens=32000, + embed_dim=512, + open_weights=False, + n_parameters=None, + memory_usage=None, + license=None, + reference="https://blog.voyageai.com/2024/09/18/voyage-3/", + similarity_fn_name="cosine", + framework=["API"], + use_instructions=False, +) From ba09b11d27ec2d047cf395a6239530698e888b4f Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 29 Nov 2024 10:26:32 +0000 Subject: [PATCH 35/76] 1.20.6 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 196c5f4985..23a2bf8af6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.20.5" +version = "1.20.6" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 8e1225047d4eed79484c00440fe3f801c512eca5 Mon Sep 17 00:00:00 2001 From: Isaac Chung Date: Fri, 29 Nov 2024 15:06:51 +0200 Subject: [PATCH 36/76] feat: Evaluate missing splits (#1525) * fix: evaluate missing splits (#1268) * implement partial evaluation for missing splits * lint * requested changes done from scratch * test for missing split evaluation added * uncomment test * lint * avoid circular import * use TaskResult * skip tests for now --------- Co-authored-by: Isaac Chung * got test_all_splits_evaluated passing * tests passing * address review comments * make lint * handle None cases for kg_co2_emissions * use new results info --------- Co-authored-by: Thivyanth --- mteb/evaluation/MTEB.py | 145 +++++++++++++++--- tests/test_benchmark/mock_tasks.py | 38 ++++- .../test_evaluation/test_split_evaluation.py | 91 +++++++++++ 3 files changed, 248 insertions(+), 26 deletions(-) create mode 100644 tests/test_evaluation/test_split_evaluation.py diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index 1374c1ce11..433001b696 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -5,7 +5,7 @@ import os import traceback from collections.abc import Iterable -from copy import copy +from copy import copy, deepcopy from datetime import datetime from itertools import chain from pathlib import Path @@ -15,6 +15,7 @@ import datasets from sentence_transformers import CrossEncoder, SentenceTransformer +from mteb.abstasks.AbsTask import ScoresDict from mteb.encoder_interface import Encoder from mteb.model_meta import ModelMeta from mteb.models import model_meta_from_sentence_transformers @@ -84,6 +85,8 @@ def __init__( self._version = version self.err_logs_path = err_logs_path + self.last_evaluated_splits = {} + self.select_tasks(**kwargs) def deprecation_warning( @@ -307,6 +310,70 @@ def _run_eval( tock = time() return results, tick, tock + @staticmethod + def _get_missing_splits( + existing_results: TaskResult | None, task_eval_splits: list[str] + ) -> list[str]: + if existing_results is None: + return task_eval_splits + + missing_splits = [] + for split in task_eval_splits: + if split not in existing_results.scores: + missing_splits.append(split) + elif not existing_results.scores[ + split + ]: # Check if the split has any scores + missing_splits.append(split) + + return missing_splits + + @staticmethod + def _merge_results( + existing_results: TaskResult, new_results: TaskResult + ) -> TaskResult: + merged_scores = existing_results.scores.copy() + + for split, scores in new_results.scores.items(): + if split in merged_scores: + merged_scores[split] = MTEB._merge_split_scores( + merged_scores[split], scores + ) + else: + merged_scores[split] = scores + + existing_kg_co2_emissions = ( + existing_results.kg_co2_emissions + if existing_results.kg_co2_emissions + else 0 + ) + new_kg_co2_emissions = ( + new_results.kg_co2_emissions if new_results.kg_co2_emissions else 0 + ) + merged_kg_co2_emissions = None + if existing_kg_co2_emissions and new_kg_co2_emissions: + merged_kg_co2_emissions = existing_kg_co2_emissions + new_kg_co2_emissions + merged_results = TaskResult( + dataset_revision=new_results.dataset_revision, + task_name=new_results.task_name, + mteb_version=new_results.mteb_version, + scores=merged_scores, + evaluation_time=existing_results.evaluation_time + + new_results.evaluation_time, + kg_co2_emissions=merged_kg_co2_emissions, + ) + + return merged_results + + @staticmethod + def _merge_split_scores( + existing_scores: list[ScoresDict], new_scores: list[ScoresDict] + ) -> list[ScoresDict]: + merged = {score["hf_subset"]: score for score in existing_scores} + for score in new_scores: + merged[score["hf_subset"]] = score + return list(merged.values()) + def run( self, model: SentenceTransformer | Encoder, @@ -378,30 +445,51 @@ def run( original_tasks = ( self.tasks.copy() ) # save them in case we re-use the object (e.g. for reranking) + + # To evaluate missing splits, we keep track of the task name and the corresponding splits. + self.last_evaluated_splits = {} + while len(self.tasks) > 0: task = self.tasks[0] logger.info( f"\n\n********************** Evaluating {task.metadata.name} **********************" ) - # skip evaluation if results folder exists and overwrite_results is False if output_path: save_path = output_path / f"{task.metadata.name}{task.save_suffix}.json" - if save_path.exists() and not overwrite_results: - logger.info( - f"{task.metadata.name} results already exists. Loading results from disk. Set overwrite_results=True to overwrite." - ) - mteb_results = TaskResult.from_disk(save_path) - evaluation_results.append(mteb_results) - del self.tasks[0] # empty memory - continue - try: + existing_results = None + if save_path.exists(): + existing_results = TaskResult.from_disk(save_path) + + if not overwrite_results: + logger.info( + f"{task.metadata.name} results already exists. Loading results from disk. Set overwrite_results=True to overwrite." + ) + evaluation_results.append(existing_results) + del self.tasks[0] # empty memory + continue + task_eval_splits = ( eval_splits if eval_splits is not None else task.eval_splits ) + missing_splits = self._get_missing_splits( + existing_results, task_eval_splits + ) + + if not missing_splits and existing_results: + evaluation_results.append(existing_results) + + # no splits are evaluated. + self.last_evaluated_splits[task.metadata.name] = [] + del self.tasks[0] + continue + + if missing_splits: + logger.info( + f"Running evaluation for missing splits: {missing_splits}" + ) - # load data - logger.info(f"Loading dataset for {task.metadata_dict['name']}") + try: task.check_if_dataset_is_superseeded() task.load_data(eval_splits=task_eval_splits, **kwargs) @@ -409,7 +497,10 @@ def run( task_results = {} evaluation_time = 0 kg_co2_emissions: int | None = 0 if co2_tracker else None - for split in task_eval_splits: + + self.last_evaluated_splits[task.metadata.name] = [] + + for split in missing_splits: if co2_tracker: try: from codecarbon import EmissionsTracker @@ -443,6 +534,8 @@ def run( **kwargs, ) + self.last_evaluated_splits[task.metadata.name].append(split) + logger.info( f"Evaluation for {task.metadata_dict['name']} on {split} took {tock - tick:.2f} seconds" ) @@ -452,21 +545,22 @@ def run( if verbosity >= 1: logger.info(f"Scores: {results}") - mteb_task_result = TaskResult.from_task_results( + new_results = TaskResult.from_task_results( task, task_results, evaluation_time=evaluation_time, kg_co2_emissions=kg_co2_emissions, ) - # save results + if existing_results: + merged_results = self._merge_results(existing_results, new_results) + else: + merged_results = new_results + if output_path: - with open(save_path, "w") as f_out: - json.dump( - mteb_task_result.to_dict(), f_out, indent=2, sort_keys=True - ) + merged_results.to_disk(save_path) - evaluation_results.append(mteb_task_result) + evaluation_results.append(merged_results) except Exception as e: logger.error( @@ -485,7 +579,6 @@ def run( # empty memory del self.tasks[0] - # restore original tasks self.tasks = original_tasks return evaluation_results @@ -536,3 +629,11 @@ def _save_model_metadata(model_meta: ModelMeta, output_folder: Path) -> None: with save_path.open("w") as f: json.dump(model_meta.to_dict(), f) + + def get_last_evaluated_splits(self): + """Returns a dictionary of tasks and their evaluated splits from the most recent run. + Tasks with empty lists indicate that results already existed and no splits were evaluated. + """ + return deepcopy( + {task: list(splits) for task, splits in self.last_evaluated_splits.items()} + ) diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index 489b67ab43..1442902288 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -1284,14 +1284,32 @@ class MockRetrievalTask(AbsTaskRetrieval): "average_relevant_docs_per_query": 2.0, "max_relevant_docs_per_query": 2, "unique_relevant_docs": 2, - } + }, + "val": { + "number_of_characters": 112, + "num_samples": 4, + "num_queries": 2, + "num_documents": 2, + "min_document_length": 23, + "average_document_length": 26.0, + "max_document_length": 29, + "unique_documents": 2, + "min_query_length": 27, + "average_query_length": 30.0, + "max_query_length": 33, + "unique_queries": 2, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 2.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + }, } metadata = TaskMetadata( type="Retrieval", name="MockRetrievalTask", main_score="ndcg_at_10", - **general_args, # type: ignore + **dict(general_args | {"eval_splits": ["val", "test"]}), # type: ignore ) def load_data(self, **kwargs): @@ -1299,13 +1317,21 @@ def load_data(self, **kwargs): "test": { "q1": "This is a test sentence", "q2": "This is another test sentence", - } + }, + "val": { + "q1": "This is a test sentence", + "q2": "This is another test sentence", + }, } self.corpus = { "test": { "d1": "This is a positive sentence", "d2": "This is another positive sentence", - } + }, + "val": { + "d1": "This is a positive sentence", + "d2": "This is another positive sentence", + }, } self.relevant_docs = { @@ -1313,6 +1339,10 @@ def load_data(self, **kwargs): "q1": {"d1": 1, "d2": 0}, "q2": {"d1": 0, "d2": 1}, }, + "val": { + "q1": {"d1": 1, "d2": 0}, + "q2": {"d1": 0, "d2": 1}, + }, } self.data_loaded = True diff --git a/tests/test_evaluation/test_split_evaluation.py b/tests/test_evaluation/test_split_evaluation.py new file mode 100644 index 0000000000..a2ca249747 --- /dev/null +++ b/tests/test_evaluation/test_split_evaluation.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +import pytest + +from mteb import MTEB +from tests.test_benchmark.mock_models import ( + MockSentenceTransformer, +) +from tests.test_benchmark.mock_tasks import ( + MockRetrievalTask, +) + + +@pytest.fixture +def model(): + return MockSentenceTransformer() + + +@pytest.fixture +def tasks(): + return [MockRetrievalTask()] + + +def test_all_splits_evaluated(model, tasks, tmp_path): + evaluation = MTEB(tasks=tasks) + results = evaluation.run( + model, + eval_splits=["val", "test"], + output_folder=str(tmp_path / "all_splits_evaluated"), + verbosity=2, + ) + + assert "MockRetrievalTask" == results[0].task_name + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert set(last_evaluated_splits["MockRetrievalTask"]) == {"val", "test"} + assert len(last_evaluated_splits["MockRetrievalTask"]) == 2 + + +def test_one_missing_split(model, tasks, tmp_path): + evaluation = MTEB(tasks=tasks) + results = evaluation.run( + model, + eval_splits=["val"], + output_folder=str(tmp_path / "testcase2"), + verbosity=2, + ) + + assert "MockRetrievalTask" == results[0].task_name + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert set(last_evaluated_splits["MockRetrievalTask"]) == {"val"} + assert len(last_evaluated_splits["MockRetrievalTask"]) == 1 + + results2 = evaluation.run( + model, + eval_splits=["val", "test"], + output_folder=str(tmp_path / "testcase2"), + verbosity=2, + overwrite_results=True, + ) + + assert "MockRetrievalTask" == results2[0].task_name + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert set(last_evaluated_splits["MockRetrievalTask"]) == {"test"} + assert len(last_evaluated_splits["MockRetrievalTask"]) == 1 + + +def test_no_missing_splits(model, tasks, tmp_path): + evaluation = MTEB(tasks=tasks) + _ = evaluation.run( + model, + eval_splits=["val", "test"], + output_folder=str(tmp_path / "testcase3"), + verbosity=2, + ) + + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert "MockRetrievalTask" in last_evaluated_splits + assert len(last_evaluated_splits["MockRetrievalTask"]) == 2 + + evaluation = MTEB(tasks=tasks) + _ = evaluation.run( + model, + eval_splits=["val", "test"], + output_folder=str(tmp_path / "testcase3"), + verbosity=2, + overwrite_results=True, + ) + + last_evaluated_splits = evaluation.get_last_evaluated_splits() + assert "MockRetrievalTask" in last_evaluated_splits + assert len(last_evaluated_splits["MockRetrievalTask"]) == 0 From ee1edac2e4286b955fc4741427a0867e57a1b64d Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 29 Nov 2024 13:21:27 +0000 Subject: [PATCH 37/76] 1.21.0 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 23a2bf8af6..d526570b51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.20.6" +version = "1.21.0" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 343b6e055f1fe6784f3fcf9d99e830101bb3e16f Mon Sep 17 00:00:00 2001 From: Isaac Chung Date: Sat, 30 Nov 2024 10:57:51 +0200 Subject: [PATCH 38/76] fix: Correct typos superseeded -> superseded (#1532) fix typo -> superseded --- mteb/abstasks/AbsTask.py | 6 +++--- mteb/evaluation/MTEB.py | 2 +- mteb/leaderboard/table.py | 2 +- mteb/overview.py | 12 ++++++------ tests/test_overview.py | 8 ++++---- tests/test_tasks/test_all_abstasks.py | 6 +++--- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/mteb/abstasks/AbsTask.py b/mteb/abstasks/AbsTask.py index 8b9edfd52c..4bc64c86bf 100644 --- a/mteb/abstasks/AbsTask.py +++ b/mteb/abstasks/AbsTask.py @@ -72,11 +72,11 @@ def __init__(self, seed: int = 42, **kwargs: Any): torch.manual_seed(self.seed) torch.cuda.manual_seed_all(self.seed) - def check_if_dataset_is_superseeded(self): - """Check if the dataset is superseeded by a newer version""" + def check_if_dataset_is_superseded(self): + """Check if the dataset is superseded by a newer version""" if self.superseded_by: logger.warning( - f"Dataset '{self.metadata.name}' is superseeded by '{self.superseded_by}', you might consider using the newer version of the dataset." + f"Dataset '{self.metadata.name}' is superseded by '{self.superseded_by}', you might consider using the newer version of the dataset." ) def dataset_transform(self): diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index 433001b696..29f7ba5f61 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -490,7 +490,7 @@ def run( ) try: - task.check_if_dataset_is_superseeded() + task.check_if_dataset_is_superseded() task.load_data(eval_splits=task_eval_splits, **kwargs) # run evaluation diff --git a/mteb/leaderboard/table.py b/mteb/leaderboard/table.py index 734de2c238..c6b69785bf 100644 --- a/mteb/leaderboard/table.py +++ b/mteb/leaderboard/table.py @@ -101,7 +101,7 @@ def get_means_per_types(df: pd.DataFrame) -> pd.DataFrame: def failsafe_get_model_meta(model_name): try: return get_model_meta(model_name) - except Exception as e: + except Exception: return None diff --git a/mteb/overview.py b/mteb/overview.py index 7b1bfbb426..43f8cebc1b 100644 --- a/mteb/overview.py +++ b/mteb/overview.py @@ -57,7 +57,7 @@ def check_is_valid_language(lang: str) -> None: ) -def filter_superseeded_datasets(tasks: list[AbsTask]) -> list[AbsTask]: +def filter_superseded_datasets(tasks: list[AbsTask]) -> list[AbsTask]: return [t for t in tasks if t.superseded_by is None] @@ -230,7 +230,7 @@ def get_tasks( task_types: list[TASK_TYPE] | None = None, categories: list[TASK_CATEGORY] | None = None, tasks: list[str] | None = None, - exclude_superseeded: bool = True, + exclude_superseded: bool = True, eval_splits: list[str] | None = None, ) -> MTEBTasks: """Get a list of tasks based on the specified filters. @@ -245,7 +245,7 @@ def get_tasks( categories: A list of task categories these include "s2s" (sentence to sentence), "s2p" (sentence to paragraph) and "p2p" (paragraph to paragraph). tasks: A list of task names to include. If None, all tasks which pass the filters are included. - exclude_superseeded: A boolean flag to exclude datasets which are superseeded by another. + exclude_superseded: A boolean flag to exclude datasets which are superseded by another. eval_splits: A list of evaluation splits to include. If None, all splits are included. Returns: @@ -254,7 +254,7 @@ def get_tasks( Examples: >>> get_tasks(languages=["eng", "deu"], script=["Latn"], domains=["Legal"]) >>> get_tasks(languages=["eng"], script=["Latn"], task_types=["Classification"]) - >>> get_tasks(languages=["eng"], script=["Latn"], task_types=["Clustering"], exclude_superseeded=False) + >>> get_tasks(languages=["eng"], script=["Latn"], task_types=["Clustering"], exclude_superseded=False) >>> get_tasks(languages=["eng"], tasks=["WikipediaRetrievalMultilingual"], eval_splits=["test"]) """ if tasks: @@ -278,8 +278,8 @@ def get_tasks( _tasks = filter_tasks_by_task_types(_tasks, task_types) if categories: _tasks = filter_task_by_categories(_tasks, categories) - if exclude_superseeded: - _tasks = filter_superseeded_datasets(_tasks) + if exclude_superseded: + _tasks = filter_superseded_datasets(_tasks) return MTEBTasks(_tasks) diff --git a/tests/test_overview.py b/tests/test_overview.py index 73df5dc193..127e54f279 100644 --- a/tests/test_overview.py +++ b/tests/test_overview.py @@ -37,20 +37,20 @@ def test_get_task(task_name: str, eval_splits: list[str] | None): @pytest.mark.parametrize("script", [["Latn"], ["Cyrl"], None]) @pytest.mark.parametrize("domains", [["Legal"], ["Medical", "Non-fiction"], None]) @pytest.mark.parametrize("task_types", [["Classification"], ["Clustering"], None]) -@pytest.mark.parametrize("exclude_superseeded_datasets", [True, False]) +@pytest.mark.parametrize("exclude_superseded_datasets", [True, False]) def test_get_tasks( languages: list[str], script: list[str], domains: list[TASK_DOMAIN], task_types: list[TASK_TYPE] | None, - exclude_superseeded_datasets: bool, + exclude_superseded_datasets: bool, ): tasks = mteb.get_tasks( languages=languages, script=script, domains=domains, task_types=task_types, - exclude_superseeded=exclude_superseeded_datasets, + exclude_superseded=exclude_superseded_datasets, ) for task in tasks: @@ -65,7 +65,7 @@ def test_get_tasks( assert set(domains).intersection(set(task_domains)) if task_types: assert task.metadata.type in task_types - if exclude_superseeded_datasets: + if exclude_superseded_datasets: assert task.superseded_by is None diff --git a/tests/test_tasks/test_all_abstasks.py b/tests/test_tasks/test_all_abstasks.py index 6c00a2d5e0..20eff8c434 100644 --- a/tests/test_tasks/test_all_abstasks.py +++ b/tests/test_tasks/test_all_abstasks.py @@ -90,10 +90,10 @@ def test_dataset_availability(): asyncio.run(check_datasets_are_available_on_hf(tasks)) -def test_superseeded_dataset_exists(): - tasks = mteb.get_tasks(exclude_superseeded=False) +def test_superseded_dataset_exists(): + tasks = mteb.get_tasks(exclude_superseded=False) for task in tasks: if task.superseded_by: assert ( task.superseded_by in TASKS_REGISTRY - ), f"{task} is superseeded by {task.superseded_by} but {task.superseded_by} is not in the TASKS_REGISTRY" + ), f"{task} is superseded by {task.superseded_by} but {task.superseded_by} is not in the TASKS_REGISTRY" From e949d2ac95079de988b00de6c1138e34ac8a4c80 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 30 Nov 2024 09:12:45 +0000 Subject: [PATCH 39/76] 1.21.1 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d526570b51..bfd21af78a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.21.0" +version = "1.21.1" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 5b6f20fe6fbe7673480fbb8c36402ddbe7e203a2 Mon Sep 17 00:00:00 2001 From: Isaac Chung Date: Sun, 1 Dec 2024 16:26:32 +0200 Subject: [PATCH 40/76] fix: Task load data error for SICK-BR-STS and XStance (#1534) * fix task load data for two tasks * correct dataset keys --- .../PairClassification/multilingual/XStance.py | 4 ++-- mteb/tasks/STS/por/SickBrSTS.py | 14 ++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/mteb/tasks/PairClassification/multilingual/XStance.py b/mteb/tasks/PairClassification/multilingual/XStance.py index 515e598940..03d4f066e7 100644 --- a/mteb/tasks/PairClassification/multilingual/XStance.py +++ b/mteb/tasks/PairClassification/multilingual/XStance.py @@ -100,8 +100,8 @@ def dataset_transform(self): for split in self.metadata.eval_splits: _dataset[lang][split] = [ { - "sent1": self.dataset[lang][split]["sent1"], - "sent2": self.dataset[lang][split]["sent2"], + "sentence1": self.dataset[lang][split]["sentence1"], + "sentence2": self.dataset[lang][split]["sentence2"], "labels": self.dataset[lang][split]["labels"], } ] diff --git a/mteb/tasks/STS/por/SickBrSTS.py b/mteb/tasks/STS/por/SickBrSTS.py index 7f42fadd80..5298ab5437 100644 --- a/mteb/tasks/STS/por/SickBrSTS.py +++ b/mteb/tasks/STS/por/SickBrSTS.py @@ -60,14 +60,12 @@ def metadata_dict(self) -> dict[str, str]: return metadata_dict def dataset_transform(self): - for split in self.dataset: - self.dataset.update( - { - split: self.dataset[split].train_test_split( - test_size=N_SAMPLES, seed=self.seed, label="entailment_label" - )["test"] - } - ) + self.dataset = self.stratified_subsampling( + self.dataset, + seed=42, + splits=self.metadata.eval_splits, + label="entailment_label", + ) self.dataset = self.dataset.rename_columns( { From ec9413a021cf4390f422232753388b45998da43a Mon Sep 17 00:00:00 2001 From: github-actions Date: Sun, 1 Dec 2024 14:44:02 +0000 Subject: [PATCH 41/76] 1.21.2 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bfd21af78a..7561932403 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.21.1" +version = "1.21.2" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 39349ff4bc565bc60fa33adc0916c68eee4eb182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20Kardos?= Date: Mon, 2 Dec 2024 14:46:31 +0100 Subject: [PATCH 42/76] fix: Proprietary models now get correctly shown in leaderboard (#1530) * Fixed showing proprietary models in leaderboard * Added links to all OpenAI models * Fixed table formatting issues * Bumped Gradio version --- mteb/leaderboard/app.py | 8 ++++--- mteb/leaderboard/table.py | 44 ++++++++++++++++++++++++------------ mteb/models/openai_models.py | 2 ++ pyproject.toml | 2 +- 4 files changed, 37 insertions(+), 19 deletions(-) diff --git a/mteb/leaderboard/app.py b/mteb/leaderboard/app.py index c62788fb3e..c4e5e80efd 100644 --- a/mteb/leaderboard/app.py +++ b/mteb/leaderboard/app.py @@ -5,6 +5,7 @@ from pathlib import Path import gradio as gr +import pandas as pd from gradio_rangeslider import RangeSlider import mteb @@ -316,12 +317,13 @@ def update_scores( domains=domains, ) lower, upper = model_size - # Multiplying by millions - lower = lower * 1e6 - upper = upper * 1e6 # Setting to None, when the user doesn't specify anything if (lower == min_model_size) and (upper == max_model_size): lower, upper = None, None + else: + # Multiplying by millions + lower = lower * 1e6 + upper = upper * 1e6 benchmark_results = benchmark_results.filter_models( open_weights=availability, use_instructions=instructions, diff --git a/mteb/leaderboard/table.py b/mteb/leaderboard/table.py index c6b69785bf..b3215dd067 100644 --- a/mteb/leaderboard/table.py +++ b/mteb/leaderboard/table.py @@ -32,7 +32,7 @@ def format_scores(score: float) -> float: def format_n_parameters(n_parameters) -> str: if (n_parameters is None) or (not int(n_parameters)): - return "" + return "Unknown" n_thousand = int(n_parameters // 1e3) if n_thousand < 1: return str(int(n_parameters)) @@ -46,9 +46,7 @@ def format_n_parameters(n_parameters) -> str: def split_on_capital(s: str) -> str: """Splits on capital letters and joins with spaces""" - if all(c.isupper() for c in s): - return s - return " ".join(re.findall("[A-Z][^A-Z]*", s)) + return " ".join(re.findall(r"[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)", s)) def get_column_widths(df: pd.DataFrame) -> list[str]: @@ -59,9 +57,12 @@ def get_column_widths(df: pd.DataFrame) -> list[str]: value_lengths = [len(f"{value:.2f}") for value in df[column_name]] else: value_lengths = [len(str(value)) for value in df[column_name]] - max_length = max(max(column_word_lengths), max(value_lengths)) - n_pixels = 25 + (max_length * 10) - widths.append(f"{n_pixels}px") + try: + max_length = max(max(column_word_lengths), max(value_lengths)) + n_pixels = 35 + (max_length * 12.5) + widths.append(f"{n_pixels}px") + except Exception: + widths.append("50px") return widths @@ -138,19 +139,20 @@ def scores_to_tables( joint_table.insert(1, "mean_by_task_type", typed_mean) joint_table["borda_rank"] = get_borda_rank(per_task) joint_table = joint_table.reset_index() - joint_table = joint_table.drop(columns=["model_revision"]) model_metas = joint_table["model_name"].map(failsafe_get_model_meta) joint_table = joint_table[model_metas.notna()] joint_table["model_link"] = model_metas.map(lambda m: m.reference) joint_table.insert( 1, "Max Tokens", - model_metas.map(lambda m: str(int(m.max_tokens)) if m.max_tokens else ""), + model_metas.map( + lambda m: str(int(m.max_tokens)) if m.max_tokens else "Unknown" + ), ) joint_table.insert( 1, "Embedding Dimensions", - model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else ""), + model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"), ) joint_table.insert( 1, @@ -158,14 +160,22 @@ def scores_to_tables( model_metas.map(lambda m: format_n_parameters(m.n_parameters)), ) joint_table = joint_table.sort_values("borda_rank", ascending=True) + per_task = per_task.loc[ + joint_table.set_index(["model_name", "model_revision"]).index + ] + joint_table = joint_table.drop(columns=["model_revision"]) # Removing HF organization from model joint_table["model_name"] = joint_table["model_name"].map( lambda name: name.split("/")[-1] ) # Adding markdown link to model names - joint_table["model_name"] = ( - "[" + joint_table["model_name"] + "](" + joint_table.pop("model_link") + ")" + name_w_link = ( + "[" + joint_table["model_name"] + "](" + joint_table["model_link"] + ")" ) + joint_table["model_name"] = joint_table["model_name"].mask( + joint_table["model_link"].notna(), name_w_link + ) + joint_table = joint_table.drop(columns=["model_link"]) joint_table = joint_table.rename( columns={ "model_name": "Model", @@ -184,6 +194,7 @@ def scores_to_tables( ) joint_table.insert(0, "Rank (Borda)", joint_table.pop("borda_rank")) column_widths = get_column_widths(joint_table) + task_column_widths = get_column_widths(per_task) # overriding for model name column_widths[1] = "250px" column_types = get_column_types(joint_table) @@ -206,9 +217,12 @@ def scores_to_tables( return ( gr.DataFrame( joint_table_style, - # column_widths=column_widths, + column_widths=column_widths, datatype=column_types, - # wrap=True, + interactive=False, + wrap=True, + ), + gr.DataFrame( + per_task_style, column_widths=task_column_widths, interactive=False ), - gr.DataFrame(per_task_style), ) diff --git a/mteb/models/openai_models.py b/mteb/models/openai_models.py index bb8550b79a..268fc8cd55 100644 --- a/mteb/models/openai_models.py +++ b/mteb/models/openai_models.py @@ -81,6 +81,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: max_tokens=8191, embed_dim=3072, open_weights=False, + reference="https://openai.com/index/new-embedding-models-and-api-updates/", framework=["API"], use_instructions=False, n_parameters=None, @@ -92,6 +93,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: release_date="2022-12-15", languages=None, # supported languages not specified loader=partial(OpenAIWrapper, model_name="text-embedding-ada-002"), + reference="https://openai.com/index/new-and-improved-embedding-model/", max_tokens=8191, embed_dim=1536, open_weights=False, diff --git a/pyproject.toml b/pyproject.toml index 7561932403..8f4595afc1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,7 @@ dev = ["ruff==0.6.4", # locked so we don't get PRs which fail only due to a lint codecarbon = ["codecarbon"] speedtask = ["GPUtil>=1.4.0", "psutil>=5.9.8"] peft = ["peft>=0.11.0"] -leaderboard = ["gradio>=5.5.0", "gradio_rangeslider>=0.0.8"] +leaderboard = ["gradio>=5.7.1", "gradio_rangeslider>=0.0.8"] flagembedding = ["FlagEmbedding"] jina = ["einops>=0.8.0"] flash_attention = ["flash-attn>=2.6.3"] From d07c29b4d63124366e8e7e654dc32363896ded97 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 2 Dec 2024 14:03:34 +0000 Subject: [PATCH 43/76] 1.21.3 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8f4595afc1..3ce2d4a5ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.21.2" +version = "1.21.3" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 5fa7b7b1c450db2ff8a5402e38cce0046600b538 Mon Sep 17 00:00:00 2001 From: Isaac Chung Date: Mon, 2 Dec 2024 19:35:27 +0200 Subject: [PATCH 44/76] docs: Add Model Meta parameters and metadata (#1536) * add multi_qa_MiniLM_L6_cos_v1 model meta * add all_mpnet_base_v2 * add parameters to model meta * make lint * add extra params to meta --- mteb/model_meta.py | 4 +++ mteb/models/sentence_transformers_models.py | 40 ++++++++++++++++++++- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/mteb/model_meta.py b/mteb/model_meta.py index 4a8146b3de..0191eb58c5 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -74,6 +74,8 @@ class ModelMeta(BaseModel): input such as "query: {document}" or "passage: {document}". zero_shot_benchmarks: A list of benchmarks on which the model has been evaluated in a zero-shot setting. By default we assume that all models are evaluated non-zero-shot unless specified otherwise. + adapted_from: Name of the model from which this model is adapted from. For quantizations, fine-tunes, long doc extensions, etc. + supersedes: Name of the model that this model supersedes, e.g. nvidia/NV-Embed-v2 supersedes v1. """ model_config = ConfigDict(extra="forbid") @@ -96,6 +98,8 @@ class ModelMeta(BaseModel): similarity_fn_name: DISTANCE_METRICS | None = None use_instructions: bool | None = None zero_shot_benchmarks: list[str] | None = None + adapted_from: str | None = None + supersedes: str | None = None def to_dict(self): dict_repr = self.model_dump() diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index 7a3116e667..78458369d1 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -70,7 +70,7 @@ memory_usage=None, embed_dim=384, license="apache-2.0", - max_tokens=512, + max_tokens=256, reference="https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], @@ -127,3 +127,41 @@ framework=["Sentence Transformers", "PyTorch"], use_instructions=False, ) + +multi_qa_MiniLM_L6_cos_v1 = ModelMeta( + name="sentence-transformer/multi-qa-MiniLM-L6-cos-v1", + languages=["eng-Latn"], + open_weights=True, + revision="b207367332321f8e44f96e224ef15bc607f4dbf0", # can be any + release_date="2021-08-30", + n_parameters=22_700_000, + memory_usage=None, + embed_dim=384, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + supersedes=None, + adapted_from=None, +) + +all_mpnet_base_v2 = ModelMeta( + name="sentence-transformers/all-mpnet-base-v2", + languages=["eng-Latn"], + open_weights=True, + revision="9a3225965996d404b775526de6dbfe85d3368642", # can be any + release_date="2021-08-30", + n_parameters=109_000_000, + memory_usage=None, + embed_dim=768, + license="apache-2.0", + max_tokens=384, + reference="https://huggingface.co/sentence-transformers/all-mpnet-base-v2", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + supersedes="sentence-transformers/all-mpnet-base-v1", + adapted_from=None, +) From 36bab4d345686be0c5c91a2e67c051a286e369a3 Mon Sep 17 00:00:00 2001 From: Isaac Chung Date: Wed, 4 Dec 2024 09:47:25 +0200 Subject: [PATCH 45/76] fix: add more model meta (jina, e5) (#1537) * add e5 model meta * address review comments --- mteb/model_meta.py | 4 +- mteb/models/e5_models.py | 60 +++++++++++++- mteb/models/sentence_transformers_models.py | 88 ++++++++++++++++++++- 3 files changed, 145 insertions(+), 7 deletions(-) diff --git a/mteb/model_meta.py b/mteb/model_meta.py index 0191eb58c5..83653ec3d1 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -75,7 +75,7 @@ class ModelMeta(BaseModel): zero_shot_benchmarks: A list of benchmarks on which the model has been evaluated in a zero-shot setting. By default we assume that all models are evaluated non-zero-shot unless specified otherwise. adapted_from: Name of the model from which this model is adapted from. For quantizations, fine-tunes, long doc extensions, etc. - supersedes: Name of the model that this model supersedes, e.g. nvidia/NV-Embed-v2 supersedes v1. + superseded_by: Name of the model that supersedes this model, e.g. nvidia/NV-Embed-v2 supersedes v1. """ model_config = ConfigDict(extra="forbid") @@ -99,7 +99,7 @@ class ModelMeta(BaseModel): use_instructions: bool | None = None zero_shot_benchmarks: list[str] | None = None adapted_from: str | None = None - supersedes: str | None = None + superseded_by: str | None = None def to_dict(self): dict_repr = self.model_dump() diff --git a/mteb/models/e5_models.py b/mteb/models/e5_models.py index 612130ed65..79b84b993f 100644 --- a/mteb/models/e5_models.py +++ b/mteb/models/e5_models.py @@ -238,15 +238,17 @@ open_weights=True, revision="1c644c92ad3ba1efdad3f1451a637716616a20e8", release_date=E5_PAPER_RELEASE_DATE, - n_parameters=278_000_000, + n_parameters=109_000_000, memory_usage=None, embed_dim=768, license="mit", - max_tokens=514, + max_tokens=512, reference="https://huggingface.co/intfloat/e5-base-v2", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + superseded_by=None, + adapted_from=None, ) e5_eng_large_v2 = ModelMeta( @@ -261,7 +263,7 @@ open_weights=True, revision="b322e09026e4ea05f42beadf4d661fb4e101d311", release_date=E5_PAPER_RELEASE_DATE, - n_parameters=560_000_000, + n_parameters=335_000_000, memory_usage=None, embed_dim=1024, license="mit", @@ -270,4 +272,56 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + superseded_by=None, + adapted_from=None, +) + +e5_large = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="intfloat/e5-large", + revision="4dc6d853a804b9c8886ede6dda8a073b7dc08a81", + model_prompts=model_prompts, + ), + name="intfloat/e5-large", + languages=["eng-Latn"], + open_weights=True, + revision="4dc6d853a804b9c8886ede6dda8a073b7dc08a81", + release_date="2022-12-26", + n_parameters=335_000_000, + memory_usage=None, + embed_dim=1024, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/intfloat/e5-large", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, + superseded_by="intfloat/e5-large-v2", + adapted_from=None, +) + +e5_base = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="intfloat/e5-base", + revision="b533fe4636f4a2507c08ddab40644d20b0006d6a", + model_prompts=model_prompts, + ), + name="intfloat/e5-base", + languages=["eng-Latn"], + open_weights=True, + revision="b533fe4636f4a2507c08ddab40644d20b0006d6a", + release_date="2022-12-26", + n_parameters=109_000_000, + memory_usage=None, + embed_dim=768, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/intfloat/e5-base", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, + superseded_by="intfloat/e5-base-v2", + adapted_from=None, ) diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index 78458369d1..0311ef81f4 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -75,6 +75,8 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + superseded_by=None, + adapted_from=None, ) paraphrase_multilingual_MiniLM_L12_v2 = ModelMeta( @@ -92,6 +94,8 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + superseded_by=None, + adapted_from=None, ) paraphrase_multilingual_mpnet_base_v2 = ModelMeta( @@ -109,6 +113,8 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + superseded_by=None, + adapted_from=None, ) labse = ModelMeta( @@ -126,6 +132,8 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, + superseded_by=None, + adapted_from=None, ) multi_qa_MiniLM_L6_cos_v1 = ModelMeta( @@ -143,7 +151,7 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, - supersedes=None, + superseded_by=None, adapted_from=None, ) @@ -162,6 +170,82 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, - supersedes="sentence-transformers/all-mpnet-base-v1", + superseded_by=None, + adapted_from=None, +) + +jina_embeddings_v2_base_en = ModelMeta( + name="jinaai/jina-embeddings-v2-base-en", + languages=["eng-Latn"], + open_weights=True, + revision="6e85f575bc273f1fd840a658067d0157933c83f0", # can be any + release_date="2023-09-27", + n_parameters=137_000_000, + memory_usage=None, + embed_dim=768, + license="apache-2.0", + max_tokens=8192, + reference="https://huggingface.co/jinaai/jina-embeddings-v2-base-en", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + superseded_by=None, + adapted_from=None, +) + +jina_embeddings_v2_small_en = ModelMeta( + name="jinaai/jina-embeddings-v2-small-en", + languages=["eng-Latn"], + open_weights=True, + revision="", # can be any + release_date="2023-09-27", + n_parameters=32_700_000, + memory_usage=None, + embed_dim=512, + license="apache-2.0", + max_tokens=8192, + reference="https://huggingface.co/jinaai/jina-embeddings-v2-small-en", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + superseded_by=None, + adapted_from=None, +) + +jina_embedding_b_en_v1 = ModelMeta( + name="jinaai/jina-embedding-b-en-v1", + languages=["eng-Latn"], + open_weights=True, + revision="aa0645035294a8c0607ce5bb700aba982cdff32c", # can be any + release_date="2023-07-07", + n_parameters=110_000_000, + memory_usage=None, + embed_dim=768, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/jinaai/jina-embedding-b-en-v1", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + superseded_by="jinaai/jina-embeddings-v2-base-en", + adapted_from=None, +) + +jina_embedding_s_en_v1 = ModelMeta( + name="jinaai/jina-embedding-s-en-v1", + languages=["eng-Latn"], + open_weights=True, + revision="c1fed70aa4823a640f1a7150a276e4d3b08dce08", # can be any + release_date="2023-07-07", + n_parameters=35_000_000, + memory_usage=None, + embed_dim=512, + license="apache-2.0", + max_tokens=512, + reference="https://huggingface.co/jinaai/jina-embedding-s-en-v1", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + superseded_by="jinaai/jina-embeddings-v2-small-en", adapted_from=None, ) From ac4a706e2c3ba97d3de969bd2f36507e769b034e Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 4 Dec 2024 10:20:38 +0000 Subject: [PATCH 46/76] 1.21.4 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3ce2d4a5ef..b783ec80ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.21.3" +version = "1.21.4" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From c2f4c2649114380345115e338a63b26880dd4963 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Wed, 4 Dec 2024 11:39:28 +0100 Subject: [PATCH 47/76] Add cohere models (#1538) * fix: bug cohere names * format --- mteb/leaderboard/app.py | 1 - mteb/load_results/benchmark_results.py | 4 ++-- mteb/models/cohere_models.py | 3 +-- pyproject.toml | 1 + 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/mteb/leaderboard/app.py b/mteb/leaderboard/app.py index c4e5e80efd..c51dc7a502 100644 --- a/mteb/leaderboard/app.py +++ b/mteb/leaderboard/app.py @@ -5,7 +5,6 @@ from pathlib import Path import gradio as gr -import pandas as pd from gradio_rangeslider import RangeSlider import mteb diff --git a/mteb/load_results/benchmark_results.py b/mteb/load_results/benchmark_results.py index 756024a4e6..25a332e2cc 100644 --- a/mteb/load_results/benchmark_results.py +++ b/mteb/load_results/benchmark_results.py @@ -5,7 +5,7 @@ from collections import defaultdict from collections.abc import Iterable from pathlib import Path -from typing import Any, Callable, Literal, Optional +from typing import Any, Callable, Literal import numpy as np import pandas as pd @@ -229,7 +229,7 @@ def filter_models( return type(self).model_construct(model_results=new_model_results) def join_revisions(self): - def parse_version(version_str: str) -> Optional[Version]: + def parse_version(version_str: str) -> Version | None: try: return Version(version_str) except (InvalidVersion, TypeError): diff --git a/mteb/models/cohere_models.py b/mteb/models/cohere_models.py index ec86d2d1b1..2ed0b76a97 100644 --- a/mteb/models/cohere_models.py +++ b/mteb/models/cohere_models.py @@ -210,7 +210,7 @@ def encode( cohere_eng_3 = ModelMeta( loader=partial( CohereTextEmbeddingModel, - model_name="embed-multilingual-v3.0", + model_name="embed-english-v3.0", model_prompts=model_prompts, ), name="Cohere/Cohere-embed-english-v3.0", @@ -229,7 +229,6 @@ def encode( use_instructions=False, ) - cohere_mult_light_3 = ModelMeta( loader=partial( CohereTextEmbeddingModel, diff --git a/pyproject.toml b/pyproject.toml index b783ec80ad..10154edc89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -121,6 +121,7 @@ ignore = ["E501", # line too long "D107", # Missing docstring in __init__ "D205", # 1 blank line required between summary line and description "D415", # First line should end with a period + "C408", # don't use unecc. collection call, e.g. dict over {} ] [tool.ruff.lint.flake8-implicit-str-concat] From 5013df813621c179f08b5db52450ac9acd18514d Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Wed, 4 Dec 2024 11:41:31 +0100 Subject: [PATCH 48/76] fix: add nomic models (#1543) #1515 --- mteb/models/nomic_models.py | 66 +++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/mteb/models/nomic_models.py b/mteb/models/nomic_models.py index 0600f01be0..5039afa64c 100644 --- a/mteb/models/nomic_models.py +++ b/mteb/models/nomic_models.py @@ -87,6 +87,17 @@ def encode( # type: ignore open_weights=True, revision="b0753ae76394dd36bcfb912a46018088bca48be0", release_date="2024-02-10", # first commit + n_parameters=137_000_000, + memory_usage=None, + max_tokens=8192, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/nomic-ai/nomic-embed-text-v1.5", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, + adapted_from=None, + supersedes="nomic-ai/nomic-embed-text-v1", ) nomic_embed_v1 = ModelMeta( @@ -111,4 +122,59 @@ def encode( # type: ignore similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=True, + adapted_from=None, + supersedes=None, +) + +nomic_embed_v1_ablated = ModelMeta( + loader=partial( # type: ignore + NomicWrapper, + trust_remote_code=True, + model_name="nomic-ai/nomic-embed-text-v1-ablated", + revision="7d948905c5d5d3874fa55a925d68e49dbf411e5f", + model_prompts=model_prompts, + ), + name="nomic-ai/nomic-embed-text-v1-ablated", + languages=["eng-Latn"], + open_weights=True, + revision="7d948905c5d5d3874fa55a925d68e49dbf411e5f", + release_date="2024-01-15", # first commit + n_parameters=None, + memory_usage=None, + max_tokens=8192, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/nomic-ai/nomic-embed-text-v1-ablated", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, + adapted_from=None, + supersedes=None, +) + + +nomic_embed_v1_ablated = ModelMeta( + loader=partial( # type: ignore + NomicWrapper, + trust_remote_code=True, + model_name="nomic-ai/nomic-embed-text-v1-unsupervised", + revision="b53d557b15ae63852847c222d336c1609eced93c", + model_prompts=model_prompts, + ), + name="nomic-ai/nomic-embed-text-v1-unsupervised", + languages=["eng-Latn"], + open_weights=True, + revision="b53d557b15ae63852847c222d336c1609eced93c", + release_date="2024-01-15", # first commit + n_parameters=None, + memory_usage=None, + max_tokens=8192, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/nomic-ai/nomic-embed-text-v1-unsupervised", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, + adapted_from=None, + supersedes=None, ) From 97ab2721e5bb73bcf5ed6352366ab333234532f7 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Wed, 4 Dec 2024 11:42:20 +0100 Subject: [PATCH 49/76] fix: Added all-minilm-l12-v2 (#1542) #1515 --- mteb/models/sentence_transformers_models.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index 0311ef81f4..f2473e6116 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -249,3 +249,23 @@ superseded_by="jinaai/jina-embeddings-v2-small-en", adapted_from=None, ) + + +all_MiniLM_L12_v2 = ModelMeta( + name="sentence-transformers/all-MiniLM-L12-v2", + languages=["eng-Latn"], + open_weights=True, + revision="364dd28d28dcd3359b537f3cf1f5348ba679da62", # can be any + release_date="2021-08-30", + n_parameters=33_400_000, + memory_usage=None, + embed_dim=384, + license="apache-2.0", + max_tokens=256, + reference="https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=False, + supersedes="sentence-transformers/all-MiniLM-L12-v1", + adapted_from=None, +) From df11c382eb79d6d1b4e9b9a350a14524808f645f Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Wed, 4 Dec 2024 11:43:40 +0100 Subject: [PATCH 50/76] fix: Added arctic models (#1541) #1515 --- mteb/models/arctic_models.py | 130 ++++++++++++++++++++++++++++++++++- mteb/models/overview.py | 2 + 2 files changed, 130 insertions(+), 2 deletions(-) diff --git a/mteb/models/arctic_models.py b/mteb/models/arctic_models.py index 5f3d41a97e..6e7141b011 100644 --- a/mteb/models/arctic_models.py +++ b/mteb/models/arctic_models.py @@ -22,9 +22,135 @@ n_parameters=109_000_000, memory_usage=None, max_tokens=512, - embed_dim=256, + embed_dim=768, license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5", - similarity_fn_name="cosine_similarity", + similarity_fn_name="cosine", use_instructions=False, + adapted_from=None, + supersedes="Snowflake/snowflake-arctic-embed-m", +) + + +arctic_embed_xs = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="Snowflake/snowflake-arctic-embed-xs", + revision="742da4f66e1823b5b4dbe6c320a1375a1fd85f9e", + ), + name="Snowflake/snowflake-arctic-embed-xs", + revision="742da4f66e1823b5b4dbe6c320a1375a1fd85f9e", + release_date="2024-07-08", # initial commit of hf model. + languages=["eng_Latn"], + open_weights=True, + framework=["Sentence Transformers", "PyTorch"], + n_parameters=22_600_000, + memory_usage=None, + max_tokens=512, + embed_dim=384, + license="apache-2.0", + reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-xs", + similarity_fn_name="cosine", + use_instructions=False, + adapted_from="sentence-transformers/all-MiniLM-L6-v2", + supersedes=None, +) + + +arctic_embed_s = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="Snowflake/snowflake-arctic-embed-s", + revision="d3c1d2d433dd0fdc8e9ca01331a5f225639e798f", + ), + name="Snowflake/snowflake-arctic-embed-s", + revision="d3c1d2d433dd0fdc8e9ca01331a5f225639e798f", + release_date="2024-04-12", # initial commit of hf model. + languages=["eng_Latn"], + open_weights=True, + framework=["Sentence Transformers", "PyTorch"], + n_parameters=32_200_000, + memory_usage=None, + max_tokens=512, + embed_dim=384, + license="apache-2.0", + reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-s", + similarity_fn_name="cosine", + use_instructions=False, + adapted_from="intfloat/e5-small-unsupervised", + supersedes=None, +) + + +arctic_embed_m = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="Snowflake/snowflake-arctic-embed-m", + revision="cc17beacbac32366782584c8752220405a0f3f40", + ), + name="Snowflake/snowflake-arctic-embed-m", + revision="cc17beacbac32366782584c8752220405a0f3f40", + release_date="2024-04-12", # initial commit of hf model. + languages=["eng_Latn"], + open_weights=True, + framework=["Sentence Transformers", "PyTorch"], + n_parameters=109_000_000, + memory_usage=None, + max_tokens=512, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m", + similarity_fn_name="cosine", + use_instructions=False, + adapted_from="intfloat/e5-base-unsupervised", + supersedes=None, +) + +arctic_embed_m_long = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="Snowflake/snowflake-arctic-embed-m-long", + revision="89d0f6ab196eead40b90cb6f9fefec01a908d2d1", + ), + name="Snowflake/snowflake-arctic-embed-m-long", + revision="89d0f6ab196eead40b90cb6f9fefec01a908d2d1", + release_date="2024-04-12", # initial commit of hf model. + languages=["eng_Latn"], + open_weights=True, + framework=["Sentence Transformers", "PyTorch"], + n_parameters=109_000_000, + memory_usage=None, + max_tokens=2048, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m-long", + similarity_fn_name="cosine", + use_instructions=False, + adapted_from="nomic-ai/nomic-embed-text-v1-unsupervised", + supersedes=None, +) + + +arctic_embed_l = ModelMeta( + loader=partial( + sentence_transformers_loader, + model_name="Snowflake/snowflake-arctic-embed-l", + revision="9a9e5834d2e89cdd8bb72b64111dde496e4fe78c", + ), + name="Snowflake/snowflake-arctic-embed-l", + revision="9a9e5834d2e89cdd8bb72b64111dde496e4fe78c", + release_date="2024-04-12", # initial commit of hf model. + languages=["eng_Latn"], + open_weights=True, + framework=["Sentence Transformers", "PyTorch"], + n_parameters=109_000_000, + memory_usage=None, + max_tokens=512, + embed_dim=768, + license="apache-2.0", + reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-l", + similarity_fn_name="cosine", + use_instructions=False, + adapted_from="intfloat/e5-base-unsupervised", + supersedes=None, ) diff --git a/mteb/models/overview.py b/mteb/models/overview.py index f54a085d02..8341e42cdb 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -11,6 +11,7 @@ from mteb.encoder_interface import Encoder from mteb.model_meta import ModelMeta from mteb.models import ( + arctic_models, bge_models, bm25, cohere_models, @@ -39,6 +40,7 @@ logger = logging.getLogger(__name__) model_modules = [ + arctic_models, bge_models, bm25, cohere_models, From 37fdfa1e4ef3d4247589ee52adfb2374bf1ee8a5 Mon Sep 17 00:00:00 2001 From: Youngjoon Jang <82500463+yjoonjang@users.noreply.github.com> Date: Wed, 4 Dec 2024 19:54:54 +0900 Subject: [PATCH 51/76] fix: add sentence trimming to OpenAIWrapper (#1526) * fix: add sentence trimming to OpenAIWrapper * fix: import tiktoken library inside encode function * fix: check tokenizer library installed and update ModelMeta to pass tokenizer_name * fix: pass tokenizer_name, max_tokens to loader * fix: make tokenizer_name None for default * fix: delete changes for ModelMeta * fix: fix revision to 2 for OpenAI models * fix: add docstring for OpenAIWrapper * fix: lint * feat: add openai optional dependency set * fix: add sleep for too many requests * fix: add lint * fix: delete evaluate file --- mteb/models/openai_models.py | 98 ++++++++++++++++++++++++++++++------ pyproject.toml | 1 + 2 files changed, 84 insertions(+), 15 deletions(-) diff --git a/mteb/models/openai_models.py b/mteb/models/openai_models.py index 268fc8cd55..ca2b32b2a2 100644 --- a/mteb/models/openai_models.py +++ b/mteb/models/openai_models.py @@ -15,16 +15,37 @@ class OpenAIWrapper(Wrapper): - def __init__(self, model_name: str, embed_dim: int | None = None, **kwargs) -> None: + def __init__( + self, + model_name: str, + max_tokens: int, + tokenizer_name: str = "cl100k_base", # since all models use this tokenizer now + embed_dim: int | None = None, + **kwargs, + ) -> None: + """Wrapper for OpenAIs embedding API. + To handle documents larger than 8192 tokens, we truncate the document to the specified sequence length. + """ requires_package(self, "openai", "Openai text embedding") from openai import OpenAI + requires_package(self, "tiktoken", "Tiktoken package") + import tiktoken + self._client = OpenAI() self._model_name = model_name self._embed_dim = embed_dim + self._max_tokens = max_tokens + self._encoding = tiktoken.get_encoding(tokenizer_name) + + def truncate_text_tokens(self, text): + """Truncate a string to have `max_tokens` according to the given encoding.""" + truncated_sentence = self._encoding.encode(text)[: self._max_tokens] + return self._encoding.decode(truncated_sentence) def encode(self, sentences: list[str], **kwargs: Any) -> np.ndarray: requires_package(self, "openai", "Openai text embedding") + from openai import NotGiven if self._model_name == "text-embedding-ada-002" and self._embed_dim is not None: @@ -32,21 +53,53 @@ def encode(self, sentences: list[str], **kwargs: Any) -> np.ndarray: "Reducing embedding size available only for text-embedding-3-* models" ) + trimmed_sentences = [] + for sentence in sentences: + encoded_sentence = self._encoding.encode(sentence) + if len(encoded_sentence) > self._max_tokens: + truncated_sentence = self.truncate_text_tokens(sentence) + trimmed_sentences.append(truncated_sentence) + else: + trimmed_sentences.append(sentence) + max_batch_size = 2048 sublists = [ - sentences[i : i + max_batch_size] - for i in range(0, len(sentences), max_batch_size) + trimmed_sentences[i : i + max_batch_size] + for i in range(0, len(trimmed_sentences), max_batch_size) ] all_embeddings = [] for sublist in sublists: - response = self._client.embeddings.create( - input=sublist, - model=self._model_name, - encoding_format="float", - dimensions=self._embed_dim or NotGiven(), - ) + try: + response = self._client.embeddings.create( + input=sublist, + model=self._model_name, + encoding_format="float", + dimensions=self._embed_dim or NotGiven(), + ) + except Exception as e: + # Sleep due to too many requests + logger.info("Sleeping for 10 seconds due to error", e) + import time + + time.sleep(10) + try: + response = self._client.embeddings.create( + input=sublist, + model=self._model_name, + encoding_format="float", + dimensions=self._embed_dim or NotGiven(), + ) + except Exception as e: + logger.info("Sleeping for 60 seconds due to error", e) + time.sleep(60) + response = self._client.embeddings.create( + input=sublist, + model=self._model_name, + encoding_format="float", + dimensions=self._embed_dim or NotGiven(), + ) all_embeddings.extend(self._to_numpy(response)) return np.array(all_embeddings) @@ -57,10 +110,15 @@ def _to_numpy(self, embedding_response) -> np.ndarray: text_embedding_3_small = ModelMeta( name="openai/text-embedding-3-small", - revision="1", + revision="2", release_date="2024-01-25", languages=None, # supported languages not specified - loader=partial(OpenAIWrapper, model_name="text-embedding-3-small"), + loader=partial( + OpenAIWrapper, + model_name="text-embedding-3-small", + tokenizer_name="cl100k_base", + max_tokens=8192, + ), max_tokens=8191, embed_dim=1536, open_weights=False, @@ -74,10 +132,15 @@ def _to_numpy(self, embedding_response) -> np.ndarray: ) text_embedding_3_large = ModelMeta( name="openai/text-embedding-3-large", - revision="1", + revision="2", release_date="2024-01-25", languages=None, # supported languages not specified - loader=partial(OpenAIWrapper, model_name="text-embedding-3-large"), + loader=partial( + OpenAIWrapper, + model_name="text-embedding-3-large", + tokenizer_name="cl100k_base", + max_tokens=8192, + ), max_tokens=8191, embed_dim=3072, open_weights=False, @@ -89,10 +152,15 @@ def _to_numpy(self, embedding_response) -> np.ndarray: ) text_embedding_ada_002 = ModelMeta( name="openai/text-embedding-ada-002", - revision="1", + revision="2", release_date="2022-12-15", languages=None, # supported languages not specified - loader=partial(OpenAIWrapper, model_name="text-embedding-ada-002"), + loader=partial( + OpenAIWrapper, + model_name="text-embedding-ada-002", + tokenizer_name="cl100k_base", + max_tokens=8192, + ), reference="https://openai.com/index/new-and-improved-embedding-model/", max_tokens=8191, embed_dim=1536, diff --git a/pyproject.toml b/pyproject.toml index 10154edc89..ff488292c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,6 +61,7 @@ leaderboard = ["gradio>=5.7.1", "gradio_rangeslider>=0.0.8"] flagembedding = ["FlagEmbedding"] jina = ["einops>=0.8.0"] flash_attention = ["flash-attn>=2.6.3"] +openai = ["openai>=1.41.0", "tiktoken>=0.8.0"] [tool.coverage.report] From 1e621842d45e2cd61a0615b1f002c491ba3dedd4 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 4 Dec 2024 13:19:23 +0000 Subject: [PATCH 52/76] 1.21.5 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ff488292c0..49e6f21998 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.21.4" +version = "1.21.5" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From a44a46c3541f4187e692e3a5dd81e3ec9ef9c4f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20Kardos?= Date: Wed, 4 Dec 2024 14:23:34 +0100 Subject: [PATCH 53/76] fix: Fixed metadata errors (#1547) --- mteb/models/arctic_models.py | 12 ++++++------ mteb/models/nomic_models.py | 8 ++++---- mteb/models/sentence_transformers_models.py | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/mteb/models/arctic_models.py b/mteb/models/arctic_models.py index 6e7141b011..9ac70fd638 100644 --- a/mteb/models/arctic_models.py +++ b/mteb/models/arctic_models.py @@ -28,7 +28,7 @@ similarity_fn_name="cosine", use_instructions=False, adapted_from=None, - supersedes="Snowflake/snowflake-arctic-embed-m", + superseded_by=None, ) @@ -53,7 +53,7 @@ similarity_fn_name="cosine", use_instructions=False, adapted_from="sentence-transformers/all-MiniLM-L6-v2", - supersedes=None, + superseded_by=None, ) @@ -78,7 +78,7 @@ similarity_fn_name="cosine", use_instructions=False, adapted_from="intfloat/e5-small-unsupervised", - supersedes=None, + superseded_by=None, ) @@ -103,7 +103,7 @@ similarity_fn_name="cosine", use_instructions=False, adapted_from="intfloat/e5-base-unsupervised", - supersedes=None, + superseded_by="Snowflake/snowflake-arctic-embed-m-v1.5", ) arctic_embed_m_long = ModelMeta( @@ -127,7 +127,7 @@ similarity_fn_name="cosine", use_instructions=False, adapted_from="nomic-ai/nomic-embed-text-v1-unsupervised", - supersedes=None, + superseded_by=None, ) @@ -152,5 +152,5 @@ similarity_fn_name="cosine", use_instructions=False, adapted_from="intfloat/e5-base-unsupervised", - supersedes=None, + superseded_by=None, ) diff --git a/mteb/models/nomic_models.py b/mteb/models/nomic_models.py index 5039afa64c..254d79dd5a 100644 --- a/mteb/models/nomic_models.py +++ b/mteb/models/nomic_models.py @@ -97,7 +97,7 @@ def encode( # type: ignore framework=["Sentence Transformers", "PyTorch"], use_instructions=True, adapted_from=None, - supersedes="nomic-ai/nomic-embed-text-v1", + superseded_by=None, ) nomic_embed_v1 = ModelMeta( @@ -123,7 +123,7 @@ def encode( # type: ignore framework=["Sentence Transformers", "PyTorch"], use_instructions=True, adapted_from=None, - supersedes=None, + superseded_by="nomic-ai/nomic-embed-text-v1.5", ) nomic_embed_v1_ablated = ModelMeta( @@ -149,7 +149,7 @@ def encode( # type: ignore framework=["Sentence Transformers", "PyTorch"], use_instructions=True, adapted_from=None, - supersedes=None, + superseded_by=None, ) @@ -176,5 +176,5 @@ def encode( # type: ignore framework=["Sentence Transformers", "PyTorch"], use_instructions=True, adapted_from=None, - supersedes=None, + superseded_by=None, ) diff --git a/mteb/models/sentence_transformers_models.py b/mteb/models/sentence_transformers_models.py index f2473e6116..c2a36ac855 100644 --- a/mteb/models/sentence_transformers_models.py +++ b/mteb/models/sentence_transformers_models.py @@ -266,6 +266,6 @@ similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], use_instructions=False, - supersedes="sentence-transformers/all-MiniLM-L12-v1", + superseded_by=None, adapted_from=None, ) From d713525824ab94e6269dad6dab2911c9bf71296c Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 4 Dec 2024 13:41:40 +0000 Subject: [PATCH 54/76] 1.21.6 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 49e6f21998..c79112605a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.21.5" +version = "1.21.6" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 279a4ee5fb6cec07c2d4e85800e51c975fa5a45d Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Thu, 5 Dec 2024 10:09:12 +0100 Subject: [PATCH 55/76] fix: remove curev1 from multlingual (#1552) Seems like it was added here: https://github.com/embeddings-benchmark/mteb/commit/1cc6c9e0fe62ca4e77708b641823fa1a121f048b --- mteb/benchmarks/benchmarks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 9aaefda3cb..d5efbc092f 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -725,7 +725,6 @@ def load_results( "SpartQA", "TempReasonL1", "TRECCOVID", - "CUREv1", "WinoGrande", "BelebeleRetrieval", "MLQARetrieval", From e33973550c0dd5cd599de9661e67e99ecdf53ff4 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 5 Dec 2024 09:14:00 +0000 Subject: [PATCH 56/76] 1.21.7 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c79112605a..e8ea909f23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.21.6" +version = "1.21.7" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 2ee8d44e9ed994860ceae100fab186a209411f42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20Kardos?= Date: Fri, 6 Dec 2024 12:00:41 +0100 Subject: [PATCH 57/76] fix: Add Model2vec (#1546) * Added Model2Vec wrapper * Added Model2vec models * Added model2vec models to registry * Added model2vec as a dependency * Ran linting * Update mteb/models/model2vec_models.py Co-authored-by: Kenneth Enevoldsen * Update mteb/models/model2vec_models.py Co-authored-by: Kenneth Enevoldsen * Added adapted_from and superseeded_by to model2vec models. * Added missing import * Moved pyproject.toml to optional dependencies * Fixed typos * Added import error and changed model to model_name * Added Numpy to frameworks * Added Numpy to frameworks * Corrected false info on model2vec models * Replaced np.inf with maxint * Update mteb/models/model2vec_models.py Co-authored-by: Isaac Chung * Added option to have infinite max tokens, added it to Model2vec --------- Co-authored-by: Kenneth Enevoldsen Co-authored-by: Isaac Chung --- mteb/leaderboard/table.py | 12 +- mteb/load_results/task_results.py | 4 +- mteb/model_meta.py | 3 +- mteb/models/model2vec_models.py | 209 ++++++++++++++++++++++++++++++ mteb/models/overview.py | 2 + pyproject.toml | 1 + 6 files changed, 225 insertions(+), 6 deletions(-) create mode 100644 mteb/models/model2vec_models.py diff --git a/mteb/leaderboard/table.py b/mteb/leaderboard/table.py index b3215dd067..9856493c74 100644 --- a/mteb/leaderboard/table.py +++ b/mteb/leaderboard/table.py @@ -106,6 +106,14 @@ def failsafe_get_model_meta(model_name): return None +def format_max_tokens(max_tokens: float | None) -> str: + if max_tokens is None: + return "Unknown" + if max_tokens == np.inf: + return "Infinite" + return str(int(max_tokens)) + + def scores_to_tables( scores_long: list[dict], search_query: str | None = None ) -> tuple[gr.DataFrame, gr.DataFrame]: @@ -145,9 +153,7 @@ def scores_to_tables( joint_table.insert( 1, "Max Tokens", - model_metas.map( - lambda m: str(int(m.max_tokens)) if m.max_tokens else "Unknown" - ), + model_metas.map(lambda m: format_max_tokens(m.max_tokens)), ) joint_table.insert( 1, diff --git a/mteb/load_results/task_results.py b/mteb/load_results/task_results.py index b3b1f8cba2..cd2a2c0847 100644 --- a/mteb/load_results/task_results.py +++ b/mteb/load_results/task_results.py @@ -511,8 +511,8 @@ def validate_and_filter_scores(self, task: AbsTask | None = None) -> AbsTask: new_scores[split].append(_scores) seen_subsets.add(_scores["hf_subset"]) if seen_subsets != hf_subsets: - raise ValueError( - f"Missing subsets {hf_subsets - seen_subsets} for split {split}" + logger.warning( + f"{task.metadata.name}: Missing subsets {hf_subsets - seen_subsets} for split {split}" ) seen_splits.add(split) if seen_splits != set(splits): diff --git a/mteb/model_meta.py b/mteb/model_meta.py index 83653ec3d1..3f2cc2843f 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -25,6 +25,7 @@ "TensorFlow", "API", "Tevatron", + "NumPy", ] DISTANCE_METRICS = Literal["cosine"] @@ -87,7 +88,7 @@ class ModelMeta(BaseModel): loader: Callable[..., Encoder] | None = None n_parameters: int | None = None memory_usage: float | None = None - max_tokens: int | None = None + max_tokens: float | None = None embed_dim: int | None = None license: str | None = None open_weights: bool | None = None diff --git a/mteb/models/model2vec_models.py b/mteb/models/model2vec_models.py new file mode 100644 index 0000000000..0dfdb193cd --- /dev/null +++ b/mteb/models/model2vec_models.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +import logging +from collections.abc import Sequence +from functools import partial +from typing import Any + +import numpy as np + +from mteb.encoder_interface import PromptType +from mteb.model_meta import ModelMeta + +from .wrapper import Wrapper + +logger = logging.getLogger(__name__) + + +class Model2VecWrapper(Wrapper): + def __init__( + self, + model_name: str, + **kwargs, + ) -> None: + """Wrapper for Model2Vec models. + + Args: + model_name: The Model2Vec model to load from HuggingFace Hub. + """ + try: + from model2vec import StaticModel + except ModuleNotFoundError as e: + raise ModuleNotFoundError( + "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`." + ) from e + + self.model_name = model_name + self.static_model = StaticModel.from_pretrained(self.model_name) + + def encode( + self, + sentences: Sequence[str], + **kwargs: Any, + ) -> np.ndarray: + """Encodes the given sentences using the encoder. + + Args: + sentences: The sentences to encode. + **kwargs: Additional arguments to pass to the encoder. + + Returns: + The encoded sentences. + """ + return self.static_model.encode(sentences) + + +m2v_base_glove_subword = ModelMeta( + loader=partial( + Model2VecWrapper, + model_name="minishlab/M2V_base_glove_subword", + ), + name="minishlab/M2V_base_glove_subword", + languages=["eng_Latn"], + open_weights=True, + revision="5f4f5ca159b7321a8b39739bba0794fa0debddf4", + release_date="2024-09-21", + n_parameters=103 * 1e6, + max_tokens=np.inf, # Theoretically infinite + embed_dim=256, + license="mit", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/minishlab/M2V_base_glove_subword", + use_instructions=False, + adapted_from="BAAI/bge-base-en-v1.5", + superseded_by=None, +) + + +m2v_base_glove = ModelMeta( + loader=partial( + Model2VecWrapper, + model_name="minishlab/M2V_base_glove", + ), + name="minishlab/M2V_base_glove", + languages=["eng_Latn"], + open_weights=True, + revision="38ebd7f10f71e67fa8db898290f92b82e9cfff2b", + release_date="2024-09-21", + n_parameters=102 * 1e6, + max_tokens=np.inf, + embed_dim=256, + license="mit", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/minishlab/M2V_base_glove", + use_instructions=False, + adapted_from="BAAI/bge-base-en-v1.5", + superseded_by=None, +) + +m2v_base_output = ModelMeta( + loader=partial( + Model2VecWrapper, + model_name="minishlab/M2V_base_output", + ), + name="minishlab/M2V_base_output", + languages=["eng_Latn"], + open_weights=True, + revision="02460ae401a22b09d2c6652e23371398329551e2", + release_date="2024-09-21", + n_parameters=7.56 * 1e6, + max_tokens=np.inf, + embed_dim=256, + license="mit", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/minishlab/M2V_base_output", + use_instructions=False, + adapted_from="BAAI/bge-base-en-v1.5", + superseded_by=None, +) + +m2v_multilingual_output = ModelMeta( + loader=partial( + Model2VecWrapper, + model_name="minishlab/M2V_multilingual_output", + ), + name="minishlab/M2V_multilingual_output", + languages=["eng_Latn"], + open_weights=True, + revision="2cf4ec4e1f51aeca6c55cf9b93097d00711a6305", + release_date="2024-09-21", + n_parameters=128 * 1e6, + max_tokens=np.inf, + embed_dim=256, + license="mit", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/minishlab/M2V_multilingual_output", + use_instructions=False, + adapted_from="sentence-transformers/LaBSE", + superseded_by=None, +) + +potion_base_2m = ModelMeta( + loader=partial( + Model2VecWrapper, + model_name="minishlab/potion-base-2M", + ), + name="minishlab/potion-base-2M", + languages=["eng_Latn"], + open_weights=True, + revision="86db093558fbced2072b929eb1690bce5272bd4b", + release_date="2024-10-29", + n_parameters=2 * 1e6, + max_tokens=np.inf, + embed_dim=64, + license="mit", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/minishlab/potion-base-2M", + use_instructions=False, + adapted_from="BAAI/bge-base-en-v1.5", + superseded_by=None, +) + +potion_base_4m = ModelMeta( + loader=partial( + Model2VecWrapper, + model_name="minishlab/potion-base-4M", + ), + name="minishlab/potion-base-4M", + languages=["eng_Latn"], + open_weights=True, + revision="81b1802ada41afcd0987a37dc15e569c9fa76f04", + release_date="2024-10-29", + n_parameters=3.78 * 1e6, + max_tokens=np.inf, + embed_dim=128, + license="mit", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/minishlab/potion-base-4M", + use_instructions=False, + adapted_from="BAAI/bge-base-en-v1.5", + superseded_by=None, +) + +potion_base_8m = ModelMeta( + loader=partial( + Model2VecWrapper, + model_name="minishlab/potion-base-8M", + ), + name="minishlab/potion-base-8M", + languages=["eng_Latn"], + open_weights=True, + revision="dcbec7aa2d52fc76754ac6291803feedd8c619ce", + release_date="2024-10-29", + n_parameters=7.56 * 1e6, + max_tokens=np.inf, + embed_dim=256, + license="mit", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/minishlab/potion-base-8M", + use_instructions=False, + adapted_from="BAAI/bge-base-en-v1.5", + superseded_by=None, +) diff --git a/mteb/models/overview.py b/mteb/models/overview.py index 8341e42cdb..7418ee98fa 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -22,6 +22,7 @@ gte_models, jina_models, llm2vec_models, + model2vec_models, mxbai_models, nomic_models, openai_models, @@ -51,6 +52,7 @@ gte_models, llm2vec_models, mxbai_models, + model2vec_models, nomic_models, openai_models, ru_sentence_models, diff --git a/pyproject.toml b/pyproject.toml index e8ea909f23..f978c01789 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ flagembedding = ["FlagEmbedding"] jina = ["einops>=0.8.0"] flash_attention = ["flash-attn>=2.6.3"] openai = ["openai>=1.41.0", "tiktoken>=0.8.0"] +model2vec = ["model2vec>=0.3.0"] [tool.coverage.report] From 29058133f8b0750ec0c66c48495c9028bd7e2616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20Kardos?= Date: Fri, 6 Dec 2024 12:04:18 +0100 Subject: [PATCH 58/76] Made result loading more permissive, changed eval splits for HotPotQA and DBPedia (#1554) * Removed train and dev from eval splits on HotpotQA * Removed dev from eval splits on DBPedia * Made task_results validation more permissive * Readded exception in get_score * Ran linting --- mteb/load_results/task_results.py | 4 +++- mteb/tasks/Retrieval/eng/DBPediaRetrieval.py | 2 +- mteb/tasks/Retrieval/eng/HotpotQARetrieval.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/mteb/load_results/task_results.py b/mteb/load_results/task_results.py index cd2a2c0847..0c999bd805 100644 --- a/mteb/load_results/task_results.py +++ b/mteb/load_results/task_results.py @@ -516,7 +516,9 @@ def validate_and_filter_scores(self, task: AbsTask | None = None) -> AbsTask: ) seen_splits.add(split) if seen_splits != set(splits): - raise ValueError(f"Missing splits {set(splits) - seen_splits}") + logger.warning( + f"{task.metadata.name}: Missing splits {set(splits) - seen_splits}" + ) new_res = {**self.to_dict(), "scores": new_scores} new_res = TaskResult.from_validated(**new_res) return new_res diff --git a/mteb/tasks/Retrieval/eng/DBPediaRetrieval.py b/mteb/tasks/Retrieval/eng/DBPediaRetrieval.py index 38527d2483..77c0020aa0 100644 --- a/mteb/tasks/Retrieval/eng/DBPediaRetrieval.py +++ b/mteb/tasks/Retrieval/eng/DBPediaRetrieval.py @@ -17,7 +17,7 @@ class DBPedia(AbsTaskRetrieval): type="Retrieval", category="s2p", modalities=["text"], - eval_splits=["dev", "test"], + eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", date=("2017-01-01", "2017-01-01"), # best guess: based on publication date diff --git a/mteb/tasks/Retrieval/eng/HotpotQARetrieval.py b/mteb/tasks/Retrieval/eng/HotpotQARetrieval.py index b2bdb31455..2a347d9a05 100644 --- a/mteb/tasks/Retrieval/eng/HotpotQARetrieval.py +++ b/mteb/tasks/Retrieval/eng/HotpotQARetrieval.py @@ -20,7 +20,7 @@ class HotpotQA(AbsTaskRetrieval): type="Retrieval", category="s2p", modalities=["text"], - eval_splits=["train", "dev", "test"], + eval_splits=["test"], eval_langs=["eng-Latn"], main_score="ndcg_at_10", date=("2018-01-01", "2018-12-31"), # best guess: based on publication date From a6ce6f9b7050c1fad60e0c6e8985afa9356e2728 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 6 Dec 2024 11:18:13 +0000 Subject: [PATCH 59/76] 1.21.8 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f978c01789..62c98a6814 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.21.7" +version = "1.21.8" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From fc64791943950f75ff58f522269f3329df341817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Po=C5=9Bwiata?= Date: Sat, 7 Dec 2024 15:56:08 +0100 Subject: [PATCH 60/76] docs: Correction of SICK-R metadata (#1558) * Correction of SICK-R metadata * Correction of SICK-R metadata --------- Co-authored-by: rposwiata --- docs/create_tasks_table.py | 4 +-- docs/tasks.md | 2 +- mteb/tasks/STS/eng/SickrSTS.py | 48 ++++++++++++++++------------------ 3 files changed, 25 insertions(+), 29 deletions(-) diff --git a/docs/create_tasks_table.py b/docs/create_tasks_table.py index 606f349cf2..4a1be0cd89 100644 --- a/docs/create_tasks_table.py +++ b/docs/create_tasks_table.py @@ -131,14 +131,14 @@ def insert_tables( file_path: str, tables: list[str], tags: list[str] = ["TASKS TABLE"] ) -> None: """Insert tables within and or similar tags.""" - md = Path(file_path).read_text() + md = Path(file_path).read_text(encoding="utf-8") for table, tag in zip(tables, tags): start = f"" end = f"" md = md.replace(md[md.index(start) + len(start) : md.index(end)], table) - Path(file_path).write_text(md) + Path(file_path).write_text(md, encoding="utf-8") def main(): diff --git a/docs/tasks.md b/docs/tasks.md index 0abcf2b8db..f32f90cf93 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -459,7 +459,7 @@ The following tables give you an overview of the tasks in MTEB. | [SICK-BR-PC](https://linux.ime.usp.br/~thalen/SICK_PT.pdf) | ['por'] | PairClassification | s2s | [Web, Written] | None | None | | [SICK-BR-STS](https://linux.ime.usp.br/~thalen/SICK_PT.pdf) | ['por'] | STS | s2s | [Web, Written] | None | None | | [SICK-E-PL](https://aclanthology.org/2020.lrec-1.207) | ['pol'] | PairClassification | s2s | | None | None | -| [SICK-R](https://aclanthology.org/2020.lrec-1.207) | ['eng'] | STS | s2s | | None | None | +| [SICK-R](https://aclanthology.org/L14-1314/) | ['eng'] | STS | s2s | [Web, Written] | None | None | | [SICK-R-PL](https://aclanthology.org/2020.lrec-1.207) | ['pol'] | STS | s2s | [Web, Written] | None | None | | [SICKFr](https://huggingface.co/datasets/Lajavaness/SICK-fr) | ['fra'] | STS | s2s | | None | None | | [SIQA](https://leaderboard.allenai.org/socialiqa/submissions/get-started) (Xiao et al., 2024) | ['eng'] | Retrieval | s2s | [Encyclopaedic, Written] | None | None | diff --git a/mteb/tasks/STS/eng/SickrSTS.py b/mteb/tasks/STS/eng/SickrSTS.py index 1d636688de..1c93fff578 100644 --- a/mteb/tasks/STS/eng/SickrSTS.py +++ b/mteb/tasks/STS/eng/SickrSTS.py @@ -12,8 +12,8 @@ class SickrSTS(AbsTaskSTS): "path": "mteb/sickr-sts", "revision": "20a6d6f312dd54037fe07a32d58e5e168867909d", }, - description="Semantic Textual Similarity SICK-R dataset as described here:", - reference="https://aclanthology.org/2020.lrec-1.207", + description="Semantic Textual Similarity SICK-R dataset", + reference="https://aclanthology.org/L14-1314/", type="STS", category="s2s", modalities=["text"], @@ -21,41 +21,37 @@ class SickrSTS(AbsTaskSTS): eval_langs=["eng-Latn"], main_score="cosine_spearman", date=None, - domains=None, - task_subtypes=None, - license=None, - annotations_creators=None, + domains=["Web", "Written"], + task_subtypes=["Textual Entailment"], + license="cc-by-nc-sa-3.0", + annotations_creators="human-annotated", dialect=None, sample_creation=None, - bibtex_citation="""@inproceedings{dadas-etal-2020-evaluation, - title = "Evaluation of Sentence Representations in {P}olish", - author = "Dadas, Slawomir and - Pere{\l}kiewicz, Micha{\l} and - Po{\'s}wiata, Rafa{\l}", + bibtex_citation="""@inproceedings{marelli-etal-2014-sick, + title = "A {SICK} cure for the evaluation of compositional distributional semantic models", + author = "Marelli, Marco and + Menini, Stefano and + Baroni, Marco and + Bentivogli, Luisa and + Bernardi, Raffaella and + Zamparelli, Roberto", editor = "Calzolari, Nicoletta and - B{\'e}chet, Fr{\'e}d{\'e}ric and - Blache, Philippe and Choukri, Khalid and - Cieri, Christopher and Declerck, Thierry and - Goggi, Sara and - Isahara, Hitoshi and + Loftsson, Hrafn and Maegaard, Bente and Mariani, Joseph and - Mazo, H{\'e}l{\`e}ne and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios", - booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference", + booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)", month = may, - year = "2020", - address = "Marseille, France", - publisher = "European Language Resources Association", - url = "https://aclanthology.org/2020.lrec-1.207", - pages = "1674--1680", - abstract = "Methods for learning sentence representations have been actively developed in recent years. However, the lack of pre-trained models and datasets annotated at the sentence level has been a problem for low-resource languages such as Polish which led to less interest in applying these methods to language-specific tasks. In this study, we introduce two new Polish datasets for evaluating sentence embeddings and provide a comprehensive evaluation of eight sentence representation methods including Polish and multilingual models. We consider classic word embedding models, recently developed contextual embeddings and multilingual sentence encoders, showing strengths and weaknesses of specific approaches. We also examine different methods of aggregating word vectors into a single sentence vector.", - language = "English", - ISBN = "979-10-95546-34-4", + year = "2014", + address = "Reykjavik, Iceland", + publisher = "European Language Resources Association (ELRA)", + url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/363_Paper.pdf", + pages = "216--223", + abstract = "Shared and internationally recognized benchmarks are fundamental for the development of any computational system. We aim to help the research community working on compositional distributional semantic models (CDSMs) by providing SICK (Sentences Involving Compositional Knowldedge), a large size English benchmark tailored for them. SICK consists of about 10,000 English sentence pairs that include many examples of the lexical, syntactic and semantic phenomena that CDSMs are expected to account for, but do not require dealing with other aspects of existing sentential data sets (idiomatic multiword expressions, named entities, telegraphic language) that are not within the scope of CDSMs. By means of crowdsourcing techniques, each pair was annotated for two crucial semantic tasks: relatedness in meaning (with a 5-point rating scale as gold score) and entailment relation between the two elements (with three possible gold labels: entailment, contradiction, and neutral). The SICK data set was used in SemEval-2014 Task 1, and it freely available for research purposes.", }""", ) From 611b6a175911d7a238f13439243e3c95652a2d85 Mon Sep 17 00:00:00 2001 From: Daniel Buades Marcos Date: Sat, 7 Dec 2024 21:08:14 +0100 Subject: [PATCH 61/76] feat(google_models): fix issues and add support for `text-embedding-005` and `text-multilingual-embedding-002` (#1562) * fix: google_models batching and prompt * feat: add text-embedding-005 and text-multilingual-embedding-002 * chore: `make lint` errors * fix: address PR comments --- mteb/models/google_models.py | 137 ++++++++++++++++++++++++++------ mteb/models/model2vec_models.py | 2 +- 2 files changed, 115 insertions(+), 24 deletions(-) diff --git a/mteb/models/google_models.py b/mteb/models/google_models.py index 688680abc4..4fcd21ae6e 100644 --- a/mteb/models/google_models.py +++ b/mteb/models/google_models.py @@ -4,12 +4,44 @@ from typing import Any import numpy as np +import tqdm from mteb.encoder_interface import Encoder, PromptType from mteb.model_meta import ModelMeta from .wrapper import Wrapper +MULTILINGUAL_EVALUATED_LANGUAGES = [ + "arb_Arab", + "ben_Beng", + "eng_Latn", + "spa_Latn", + "deu_Latn", + "pes_Arab", + "fin_Latn", + "fra_Latn", + "hin_Deva", + "ind_Latn", + "jpn_Jpan", + "kor_Hang", + "rus_Cyrl", + "swh_Latn", + "tel_Telu", + "tha_Thai", + "yor_Latn", + "zho_Hant", + "zho_Hans", +] + +MODEL_PROMPTS = { + "Classification": "CLASSIFICATION", + "MultilabelClassification": "CLASSIFICATION", + "Clustering": "CLUSTERING", + "STS": "SIMILARITY", + PromptType.query.value: "RETRIEVAL_QUERY", + PromptType.passage.value: "RETRIEVAL_DOCUMENT", +} + class GoogleTextEmbeddingModel(Encoder, Wrapper): def __init__( @@ -28,6 +60,7 @@ def _embed( self, texts: list[str], google_task_type: str | None = None, + show_progress_bar: bool = False, titles: list[str] | None = None, dimensionality: int | None = 768, ) -> list[list[float]]: @@ -54,14 +87,28 @@ def _embed( inputs = [ TextEmbeddingInput(text, task_type=google_task_type) for text in texts ] + kwargs = {"output_dimensionality": dimensionality} if dimensionality else {} - try: - embeddings = model.get_embeddings(inputs, **kwargs) - # Except the very rare google.api_core.exceptions.InternalServerError - except Exception as e: - print("Retrying once after error:", e) - embeddings = model.get_embeddings(inputs, **kwargs) - return np.asarray([embedding.values for embedding in embeddings]) + + max_batch_size = 16 ## Vertex API limits the number of instances per call to 250, but there is also a limit of tokens involved. Let's be conservative and set it to 16 by default. TODO: in a future PR, leverage the CountTokens API to get the optimum batch size for each request. + batches = [ + inputs[i : i + max_batch_size] + for i in range(0, len(inputs), max_batch_size) + ] + + all_embeddings = [] + + for batch in tqdm.tqdm(batches, leave=False, disable=not show_progress_bar): + try: + embeddings_batch = model.get_embeddings(batch, **kwargs) + # Except the very rare google.api_core.exceptions.InternalServerError + except Exception as e: + print("Retrying once after error:", e) + embeddings_batch = model.get_embeddings(batch, **kwargs) + + all_embeddings.extend([embedding.values for embedding in embeddings_batch]) + + return np.asarray(all_embeddings) def encode( self, @@ -70,31 +117,75 @@ def encode( prompt_type: PromptType | None = None, **kwargs: Any, ) -> np.ndarray: - google_task_type = self.get_prompt_name( - self.model_prompts, task_name, prompt_type + prompt_name = self.get_prompt_name(self.model_prompts, task_name, prompt_type) + google_task_type = self.model_prompts.get(prompt_name) + + show_progress_bar = ( + False + if "show_progress_bar" not in kwargs + else kwargs.pop("show_progress_bar") + ) + + return self._embed( + sentences, + google_task_type=google_task_type, + show_progress_bar=show_progress_bar, ) - return self._embed(sentences, google_task_type=google_task_type) -name = "text-embedding-004" -google_emb_004 = ModelMeta( +google_text_emb_004 = ModelMeta( loader=partial( GoogleTextEmbeddingModel, - model_name=name, - model_prompts={ - "Classification": "CLASSIFICATION", - "MultilabelClassification": "CLASSIFICATION", - "Clustering": "CLUSTERING", - "STS": "SIMILARITY", - PromptType.query.value: "RETRIEVAL_QUERY", - PromptType.passage.value: "RETRIEVAL_DOCUMENT", - }, + model_name="text-embedding-004", + model_prompts=MODEL_PROMPTS, ), - name=name, + name="google/text-embedding-004", languages=["eng-Latn"], open_weights=False, revision="1", # revision is intended for implementation - release_date=None, # couldnt figure this out + release_date="2024-05-14", + n_parameters=None, + memory_usage=None, + max_tokens=2048, + embed_dim=768, + license=None, + similarity_fn_name="cosine", # assumed + framework=["API"], + use_instructions=True, +) + +google_text_emb_005 = ModelMeta( + loader=partial( + GoogleTextEmbeddingModel, + model_name="text-embedding-005", + model_prompts=MODEL_PROMPTS, + ), + name="google/text-embedding-005", + languages=["eng-Latn"], + open_weights=False, + revision="1", # revision is intended for implementation + release_date="2024-11-18", + n_parameters=None, + memory_usage=None, + max_tokens=2048, + embed_dim=768, + license=None, + similarity_fn_name="cosine", # assumed + framework=["API"], + use_instructions=True, +) + +google_text_multilingual_emb_002 = ModelMeta( + loader=partial( + GoogleTextEmbeddingModel, + model_name="text-multilingual-embedding-002", + model_prompts=MODEL_PROMPTS, + ), + name="google/text-multilingual-embedding-002", + languages=MULTILINGUAL_EVALUATED_LANGUAGES, # From the list of evaluated languages in https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#supported_text_languages + open_weights=False, + revision="1", # revision is intended for implementation + release_date="2024-05-14", n_parameters=None, memory_usage=None, max_tokens=2048, diff --git a/mteb/models/model2vec_models.py b/mteb/models/model2vec_models.py index 0dfdb193cd..e0109be125 100644 --- a/mteb/models/model2vec_models.py +++ b/mteb/models/model2vec_models.py @@ -7,7 +7,6 @@ import numpy as np -from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta from .wrapper import Wrapper @@ -25,6 +24,7 @@ def __init__( Args: model_name: The Model2Vec model to load from HuggingFace Hub. + **kwargs: Additional arguments to pass to the wrapper. """ try: from model2vec import StaticModel From 5e7e0339c73ece757dd5ad897d7d3e27c0473ff5 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 7 Dec 2024 20:23:39 +0000 Subject: [PATCH 62/76] 1.22.0 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 62c98a6814..d58aff46cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.21.8" +version = "1.22.0" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From ac44e58d0a94b9f571f0ca41e004af31dcef3b1b Mon Sep 17 00:00:00 2001 From: Daniel Buades Marcos Date: Sat, 7 Dec 2024 23:02:15 +0100 Subject: [PATCH 63/76] fix(bm25s): search implementation (#1566) fix: bm25s implementation --- mteb/evaluation/evaluators/RetrievalEvaluator.py | 6 +++--- mteb/models/bm25.py | 12 +++++++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/mteb/evaluation/evaluators/RetrievalEvaluator.py b/mteb/evaluation/evaluators/RetrievalEvaluator.py index 20a29b3ad5..8ec28c14ef 100644 --- a/mteb/evaluation/evaluators/RetrievalEvaluator.py +++ b/mteb/evaluation/evaluators/RetrievalEvaluator.py @@ -477,10 +477,10 @@ def __call__( if self.is_cross_encoder: return self.retriever.search_cross_encoder(corpus, queries, self.top_k) elif ( - hasattr(self.retriever.model, "mteb_model_meta") - and self.retriever.model.mteb_model_meta.name == "bm25s" + hasattr(self.retriever.model.model, "mteb_model_meta") + and self.retriever.model.model.mteb_model_meta.name == "bm25s" ): - return self.retriever.model.search( + return self.retriever.model.model.search( corpus, queries, self.top_k, diff --git a/mteb/models/bm25.py b/mteb/models/bm25.py index 1848b9e4e4..fdc86fb210 100644 --- a/mteb/models/bm25.py +++ b/mteb/models/bm25.py @@ -58,7 +58,17 @@ def search( ) -> dict[str, dict[str, float]]: logger.info("Encoding Corpus...") corpus_ids = list(corpus.keys()) - corpus_with_ids = [{"doc_id": cid, **corpus[cid]} for cid in corpus_ids] + corpus_with_ids = [ + { + "doc_id": cid, + **( + {"text": corpus[cid]} + if isinstance(corpus[cid], str) + else corpus[cid] + ), + } + for cid in corpus_ids + ] corpus_texts = [ "\n".join([doc.get("title", ""), doc["text"]]) From b8ff89c2656a0437c3383fd64a8dfd265dbb1919 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 7 Dec 2024 22:18:04 +0000 Subject: [PATCH 64/76] 1.22.1 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d58aff46cb..c71b1e413a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.22.0" +version = "1.22.1" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 03347ebfe4809056e0fd2894fcae69dcdd2ed964 Mon Sep 17 00:00:00 2001 From: Isaac Chung Date: Sun, 8 Dec 2024 01:49:51 +0200 Subject: [PATCH 65/76] docs: Fix dependency library name for bm25s (#1568) * fix: bm25s implementation * correct library name --------- Co-authored-by: Daniel Buades Marcos --- mteb/models/bm25.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/models/bm25.py b/mteb/models/bm25.py index fdc86fb210..7d1161cdde 100644 --- a/mteb/models/bm25.py +++ b/mteb/models/bm25.py @@ -17,7 +17,7 @@ def bm25_loader(**kwargs): import Stemmer except ImportError: raise ImportError( - "bm25s or Stemmer is not installed. Please install it with `pip install bm25s Stemmer`." + "bm25s or Stemmer is not installed. Please install it with `pip install bm25s PyStemmer`." ) class BM25Search(DRESModel, Wrapper): From 6489fca1b47f60fd335e6ae644f89cb15fc5f943 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Sat, 7 Dec 2024 18:15:23 -0800 Subject: [PATCH 66/76] fix: Add training dataset to model meta (#1561) * fix: Add training dataset to model meta Adresses #1556 * Added docs * format --- README.md | 22 ++++++++++++++++++++++ mteb/model_meta.py | 7 ++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3c659bbde5..d105a7aeb8 100644 --- a/README.md +++ b/README.md @@ -379,6 +379,28 @@ results = mteb.load_results(models=models, tasks=tasks) df = results_to_dataframe(results) ``` + + + +
+ Annotate Contamination in the training data of a model + +### Annotate Contamination + +have your found contamination in the training data of a model? Please let us know, either by opening an issue or ideally by submitting a PR +annotatig the training datasets of the model: + +```py +model_w_contamination = ModelMeta( + name = "model-with-contamination" + ... + training_datasets: {"ArguAna": # name of dataset within MTEB + ["test"]} # the splits that have been trained on + ... +) +``` + +
diff --git a/mteb/model_meta.py b/mteb/model_meta.py index 3f2cc2843f..2cfc6df297 100644 --- a/mteb/model_meta.py +++ b/mteb/model_meta.py @@ -73,8 +73,9 @@ class ModelMeta(BaseModel): in the Latin script. use_instructions: Whether the model uses instructions E.g. for prompt-based models. This also include models that require a specific format for input such as "query: {document}" or "passage: {document}". - zero_shot_benchmarks: A list of benchmarks on which the model has been evaluated in a zero-shot setting. By default we assume that all models - are evaluated non-zero-shot unless specified otherwise. + training_datasets: A dictionary of datasets that the model was trained on. Names should be names as their appear in `mteb` for example + {"ArguAna": ["test"]} if the model is trained on the ArguAna test set. This field is used to determine if a model generalizes zero-shot to + a benchmark as well as mark dataset contaminations. adapted_from: Name of the model from which this model is adapted from. For quantizations, fine-tunes, long doc extensions, etc. superseded_by: Name of the model that supersedes this model, e.g. nvidia/NV-Embed-v2 supersedes v1. """ @@ -98,7 +99,7 @@ class ModelMeta(BaseModel): reference: STR_URL | None = None similarity_fn_name: DISTANCE_METRICS | None = None use_instructions: bool | None = None - zero_shot_benchmarks: list[str] | None = None + training_datasets: dict[str, list[str]] | None = None adapted_from: str | None = None superseded_by: str | None = None From 1d21818c3704d1866245c21b0f186ac18fa77b9f Mon Sep 17 00:00:00 2001 From: Daniel Buades Marcos Date: Sun, 8 Dec 2024 03:17:41 +0100 Subject: [PATCH 67/76] feat: (cohere_models) cohere_task_type issue, batch requests and tqdm for visualization (#1564) * feat: batch requests to cohere models * fix: use correct task_type * feat: use tqdm with openai * fix: explicitely set `show_progress_bar` to False --- mteb/models/cohere_models.py | 68 ++++++++++++++++++++++++++---------- mteb/models/openai_models.py | 9 ++++- 2 files changed, 57 insertions(+), 20 deletions(-) diff --git a/mteb/models/cohere_models.py b/mteb/models/cohere_models.py index 2ed0b76a97..3f07a0d23b 100644 --- a/mteb/models/cohere_models.py +++ b/mteb/models/cohere_models.py @@ -5,6 +5,7 @@ import numpy as np import torch +import tqdm from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta @@ -140,25 +141,43 @@ def __init__( ) def _embed( - self, sentences: list[str], cohere_task_type: str, retries: int = 5 + self, + sentences: list[str], + cohere_task_type: str, + show_progress_bar: bool = False, + retries: int = 5, ) -> torch.Tensor: import cohere # type: ignore + max_batch_size = 256 + + batches = [ + sentences[i : i + max_batch_size] + for i in range(0, len(sentences), max_batch_size) + ] + client = cohere.Client() - while retries > 0: # Cohere's API is not always reliable - try: - response = client.embed( - texts=list(sentences), - model=self.model_name, - input_type=cohere_task_type, - ) - break - except Exception as e: - print(f"Retrying... {retries} retries left.") - retries -= 1 - if retries == 0: - raise e - return torch.tensor(response.embeddings) + + all_embeddings = [] + + for batch in tqdm.tqdm(batches, leave=False, disable=not show_progress_bar): + while retries > 0: # Cohere's API is not always reliable + try: + response = client.embed( + texts=batch, + model=self.model_name, + input_type=cohere_task_type, + ) + break + except Exception as e: + print(f"Retrying... {retries} retries left.") + retries -= 1 + if retries == 0: + raise e + + all_embeddings.extend(torch.tensor(response.embeddings).numpy()) + + return np.array(all_embeddings) def encode( self, @@ -168,13 +187,24 @@ def encode( prompt_type: PromptType | None = None, **kwargs: Any, ) -> np.ndarray: - cohere_task_type = self.get_prompt_name( - self.model_prompts, task_name, prompt_type - ) + prompt_name = self.get_prompt_name(self.model_prompts, task_name, prompt_type) + cohere_task_type = self.model_prompts.get(prompt_name) + if cohere_task_type is None: # search_document is recommended if unknown (https://cohere.com/blog/introducing-embed-v3) cohere_task_type = "search_document" - return self._embed(sentences, cohere_task_type=cohere_task_type).numpy() + + show_progress_bar = ( + False + if "show_progress_bar" not in kwargs + else kwargs.pop("show_progress_bar") + ) + + return self._embed( + sentences, + cohere_task_type=cohere_task_type, + show_progress_bar=show_progress_bar, + ) model_prompts = { diff --git a/mteb/models/openai_models.py b/mteb/models/openai_models.py index ca2b32b2a2..adf96fbe4e 100644 --- a/mteb/models/openai_models.py +++ b/mteb/models/openai_models.py @@ -5,6 +5,7 @@ from typing import Any import numpy as np +import tqdm from mteb.model_meta import ModelMeta from mteb.requires_package import requires_package @@ -68,9 +69,15 @@ def encode(self, sentences: list[str], **kwargs: Any) -> np.ndarray: for i in range(0, len(trimmed_sentences), max_batch_size) ] + show_progress_bar = ( + False + if "show_progress_bar" not in kwargs + else kwargs.pop("show_progress_bar") + ) + all_embeddings = [] - for sublist in sublists: + for sublist in tqdm.tqdm(sublists, leave=False, disable=not show_progress_bar): try: response = self._client.embeddings.create( input=sublist, From 68bd8ac79b33e48942316b26f253db644b6763ad Mon Sep 17 00:00:00 2001 From: Daniel Buades Marcos Date: Sun, 8 Dec 2024 03:18:35 +0100 Subject: [PATCH 68/76] fix(publichealth-qa): ignore rows with `None` values in `question` or `answer` (#1565) --- mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py b/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py index c22d15afc4..6f7d188b7b 100644 --- a/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py @@ -43,6 +43,9 @@ def _load_publichealthqa_data( answer_ids = {answer: _id for _id, answer in enumerate(set(data["answer"]))} for row in data: + if row["question"] is None or row["answer"] is None: + # There are some questions and answers that are None in the original dataset, specifically in the Arabic subset. + continue question = row["question"] answer = row["answer"] query_id = f"Q{question_ids[question]}" From 2550a27664a875f03d6fcf36f035c8cc93e07100 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sun, 8 Dec 2024 09:26:07 +0000 Subject: [PATCH 69/76] 1.23.0 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c71b1e413a..5a69c61aec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.22.1" +version = "1.23.0" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From d4744510651a3e828e07bb3a67f1a0d6e37c84a7 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Sun, 8 Dec 2024 17:33:31 +0300 Subject: [PATCH 70/76] fix wongnai --- mteb/abstasks/TaskMetadata.py | 1 + ...eviewsClassification .py => WongnaiReviewsClassification.py} | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) rename mteb/tasks/Classification/tha/{WongnaiReviewsClassification .py => WongnaiReviewsClassification.py} (98%) diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 65e7ddbb86..e3038b6348 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -168,6 +168,7 @@ "cc0-1.0", "bsd-3-clause", "gpl-3.0", + "lgpl-3.0", "cdla-sharing-1.0", "mpl-2.0", ] diff --git a/mteb/tasks/Classification/tha/WongnaiReviewsClassification .py b/mteb/tasks/Classification/tha/WongnaiReviewsClassification.py similarity index 98% rename from mteb/tasks/Classification/tha/WongnaiReviewsClassification .py rename to mteb/tasks/Classification/tha/WongnaiReviewsClassification.py index 1a0bfb0834..4afd64dd21 100644 --- a/mteb/tasks/Classification/tha/WongnaiReviewsClassification .py +++ b/mteb/tasks/Classification/tha/WongnaiReviewsClassification.py @@ -23,7 +23,7 @@ class WongnaiReviewsClassification(AbsTaskClassification): dialect=[], domains=["Reviews", "Written"], task_subtypes=[], - license="LGPL-3.0", + license="lgpl-3.0", annotations_creators="derived", sample_creation="found", bibtex_citation=""" From 2015ee594dc776ac598900994111acff888dcd49 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Sun, 8 Dec 2024 18:00:01 +0300 Subject: [PATCH 71/76] update inits --- mteb/__init__.py | 3 +- mteb/abstasks/__init__.py | 42 +- mteb/benchmarks/__init__.py | 57 +- mteb/evaluation/MTEB.py | 4 +- mteb/evaluation/__init__.py | 42 +- mteb/evaluation/evaluators/__init__.py | 42 +- mteb/models/arctic_models.py | 2 +- mteb/tasks/BitextMining/__init__.py | 71 +- mteb/tasks/BitextMining/dan/__init__.py | 5 + mteb/tasks/BitextMining/kat/__init__.py | 5 + .../BitextMining/multilingual/__init__.py | 41 + mteb/tasks/BitextMining/srn/__init__.py | 5 + mteb/tasks/BitextMining/vie/__init__.py | 5 + mteb/tasks/Classification/__init__.py | 681 +++++++-- mteb/tasks/Classification/ara/__init__.py | 21 + mteb/tasks/Classification/ben/__init__.py | 11 + mteb/tasks/Classification/bul/__init__.py | 7 + mteb/tasks/Classification/ces/__init__.py | 17 + mteb/tasks/Classification/dan/__init__.py | 15 + mteb/tasks/Classification/deu/__init__.py | 8 + mteb/tasks/Classification/ell/__init__.py | 5 + mteb/tasks/Classification/eng/__init__.py | 267 ++++ mteb/tasks/Classification/est/__init__.py | 5 + mteb/tasks/Classification/fas/__init__.py | 5 + mteb/tasks/Classification/fil/__init__.py | 6 + mteb/tasks/Classification/fin/__init__.py | 5 + mteb/tasks/Classification/fra/__init__.py | 6 + mteb/tasks/Classification/guj/__init__.py | 5 + mteb/tasks/Classification/heb/__init__.py | 5 + mteb/tasks/Classification/hin/__init__.py | 6 + mteb/tasks/Classification/hrv/__init__.py | 5 + mteb/tasks/Classification/ind/__init__.py | 11 + mteb/tasks/Classification/ita/__init__.py | 8 + mteb/tasks/Classification/jav/__init__.py | 5 + mteb/tasks/Classification/jpn/__init__.py | 5 + mteb/tasks/Classification/kan/__init__.py | 5 + mteb/tasks/Classification/kat/__init__.py | 5 + mteb/tasks/Classification/kor/__init__.py | 8 + mteb/tasks/Classification/kur/__init__.py | 5 + mteb/tasks/Classification/mal/__init__.py | 5 + mteb/tasks/Classification/mar/__init__.py | 5 + mteb/tasks/Classification/mkd/__init__.py | 7 + .../Classification/multilingual/__init__.py | 63 + mteb/tasks/Classification/mya/__init__.py | 5 + mteb/tasks/Classification/nep/__init__.py | 5 + mteb/tasks/Classification/nld/__init__.py | 7 + mteb/tasks/Classification/nob/__init__.py | 6 + mteb/tasks/Classification/ory/__init__.py | 5 + mteb/tasks/Classification/pan/__init__.py | 5 + mteb/tasks/Classification/pol/__init__.py | 17 + mteb/tasks/Classification/por/__init__.py | 5 + mteb/tasks/Classification/ron/__init__.py | 7 + mteb/tasks/Classification/rus/__init__.py | 19 + mteb/tasks/Classification/san/__init__.py | 5 + mteb/tasks/Classification/sin/__init__.py | 6 + mteb/tasks/Classification/slk/__init__.py | 8 + mteb/tasks/Classification/slv/__init__.py | 5 + mteb/tasks/Classification/spa/__init__.py | 6 + mteb/tasks/Classification/ssw/__init__.py | 5 + mteb/tasks/Classification/svk/__init__.py | 7 + mteb/tasks/Classification/swa/__init__.py | 5 + mteb/tasks/Classification/swe/__init__.py | 11 + mteb/tasks/Classification/tam/__init__.py | 5 + mteb/tasks/Classification/tel/__init__.py | 5 + mteb/tasks/Classification/tha/__init__.py | 6 + mteb/tasks/Classification/tsn/__init__.py | 5 + mteb/tasks/Classification/tur/__init__.py | 8 + mteb/tasks/Classification/ukr/__init__.py | 5 + mteb/tasks/Classification/urd/__init__.py | 5 + mteb/tasks/Classification/vie/__init__.py | 5 + mteb/tasks/Classification/zho/__init__.py | 21 + mteb/tasks/Classification/zul/__init__.py | 5 + mteb/tasks/Clustering/__init__.py | 220 ++- mteb/tasks/Clustering/deu/__init__.py | 17 + mteb/tasks/Clustering/eng/__init__.py | 57 + mteb/tasks/Clustering/fra/__init__.py | 14 + mteb/tasks/Clustering/jpn/__init__.py | 6 + .../tasks/Clustering/multilingual/__init__.py | 22 + mteb/tasks/Clustering/nob/__init__.py | 21 + mteb/tasks/Clustering/pol/__init__.py | 19 + mteb/tasks/Clustering/rom/__init__.py | 5 + mteb/tasks/Clustering/rus/__init__.py | 11 + mteb/tasks/Clustering/spa/__init__.py | 5 + mteb/tasks/Clustering/swe/__init__.py | 6 + mteb/tasks/Clustering/zho/__init__.py | 23 + mteb/tasks/InstructionReranking/__init__.py | 18 +- .../InstructionReranking/eng/__init__.py | 11 + .../multilingual/__init__.py | 5 + mteb/tasks/InstructionRetrieval/__init__.py | 4 +- .../InstructionRetrieval/eng/__init__.py | 5 + .../multilingual/__init__.py | 3 + .../MultiLabelClassification/__init__.py | 20 +- .../MultiLabelClassification/kor/__init__.py | 5 + .../MultiLabelClassification/mlt/__init__.py | 5 + .../multilingual/__init__.py | 5 + .../MultiLabelClassification/por/__init__.py | 5 + .../MultiLabelClassification/rus/__init__.py | 6 + mteb/tasks/PairClassification/__init__.py | 78 +- mteb/tasks/PairClassification/ara/__init__.py | 5 + mteb/tasks/PairClassification/ces/__init__.py | 5 + mteb/tasks/PairClassification/deu/__init__.py | 4 + mteb/tasks/PairClassification/eng/__init__.py | 13 + mteb/tasks/PairClassification/fas/__init__.py | 5 + mteb/tasks/PairClassification/hye/__init__.py | 5 + mteb/tasks/PairClassification/ind/__init__.py | 5 + mteb/tasks/PairClassification/kor/__init__.py | 5 + .../multilingual/__init__.py | 18 + mteb/tasks/PairClassification/pol/__init__.py | 5 + mteb/tasks/PairClassification/por/__init__.py | 6 + mteb/tasks/PairClassification/rus/__init__.py | 5 + mteb/tasks/PairClassification/zho/__init__.py | 5 + mteb/tasks/Reranking/__init__.py | 47 +- mteb/tasks/Reranking/eng/__init__.py | 17 + mteb/tasks/Reranking/fra/__init__.py | 6 + mteb/tasks/Reranking/jpn/__init__.py | 5 + mteb/tasks/Reranking/multilingual/__init__.py | 7 + mteb/tasks/Reranking/rus/__init__.py | 5 + mteb/tasks/Reranking/zho/__init__.py | 5 + mteb/tasks/Retrieval/__init__.py | 491 +++++-- mteb/tasks/Retrieval/ara/__init__.py | 5 + mteb/tasks/Retrieval/code/__init__.py | 29 + mteb/tasks/Retrieval/dan/__init__.py | 12 + mteb/tasks/Retrieval/deu/__init__.py | 17 + mteb/tasks/Retrieval/ell/__init__.py | 5 + mteb/tasks/Retrieval/eng/__init__.py | 142 ++ mteb/tasks/Retrieval/est/__init__.py | 5 + mteb/tasks/Retrieval/fra/__init__.py | 8 + mteb/tasks/Retrieval/hun/__init__.py | 5 + mteb/tasks/Retrieval/jpn/__init__.py | 17 + mteb/tasks/Retrieval/kat/__init__.py | 5 + mteb/tasks/Retrieval/kor/__init__.py | 6 + mteb/tasks/Retrieval/multilingual/__init__.py | 54 + mteb/tasks/Retrieval/nob/__init__.py | 6 + mteb/tasks/Retrieval/pol/__init__.py | 32 + mteb/tasks/Retrieval/rus/__init__.py | 6 + mteb/tasks/Retrieval/slk/__init__.py | 6 + mteb/tasks/Retrieval/spa/__init__.py | 6 + mteb/tasks/Retrieval/swe/__init__.py | 6 + mteb/tasks/Retrieval/tur/__init__.py | 5 + mteb/tasks/Retrieval/vie/__init__.py | 5 + mteb/tasks/Retrieval/zho/__init__.py | 25 + mteb/tasks/STS/__init__.py | 99 +- mteb/tasks/STS/deu/__init__.py | 5 + mteb/tasks/STS/eng/__init__.py | 21 + mteb/tasks/STS/fao/__init__.py | 5 + mteb/tasks/STS/fin/__init__.py | 5 + mteb/tasks/STS/fra/__init__.py | 5 + mteb/tasks/STS/jpn/__init__.py | 6 + mteb/tasks/STS/kor/__init__.py | 6 + mteb/tasks/STS/multilingual/__init__.py | 16 + mteb/tasks/STS/pol/__init__.py | 5 + mteb/tasks/STS/por/__init__.py | 6 + mteb/tasks/STS/ron/__init__.py | 5 + mteb/tasks/STS/rus/__init__.py | 6 + mteb/tasks/STS/spa/__init__.py | 5 + mteb/tasks/STS/zho/__init__.py | 5 + mteb/tasks/SpeedTask/__init__.py | 6 +- mteb/tasks/Summarization/__init__.py | 15 +- mteb/tasks/Summarization/eng/__init__.py | 5 + mteb/tasks/Summarization/fra/__init__.py | 5 + mteb/tasks/__init__.py | 1306 ++++++++++++++++- scripts/generate_imports.py | 106 ++ 162 files changed, 4586 insertions(+), 474 deletions(-) create mode 100644 mteb/tasks/Classification/est/__init__.py create mode 100644 mteb/tasks/Classification/jpn/__init__.py create mode 100644 mteb/tasks/Classification/ory/__init__.py create mode 100644 mteb/tasks/Classification/pan/__init__.py create mode 100644 mteb/tasks/Classification/slk/__init__.py create mode 100644 mteb/tasks/Clustering/rom/__init__.py create mode 100644 mteb/tasks/InstructionRetrieval/multilingual/__init__.py create mode 100644 mteb/tasks/PairClassification/ara/__init__.py create mode 100644 mteb/tasks/PairClassification/ces/__init__.py create mode 100644 mteb/tasks/PairClassification/fas/__init__.py create mode 100644 mteb/tasks/PairClassification/ind/__init__.py create mode 100644 mteb/tasks/PairClassification/kor/__init__.py create mode 100644 mteb/tasks/PairClassification/por/__init__.py create mode 100644 mteb/tasks/Reranking/jpn/__init__.py create mode 100644 mteb/tasks/Retrieval/hun/__init__.py create mode 100644 mteb/tasks/Retrieval/jpn/__init__.py create mode 100644 mteb/tasks/STS/fao/__init__.py create mode 100644 mteb/tasks/STS/jpn/__init__.py create mode 100644 mteb/tasks/STS/por/__init__.py create mode 100644 mteb/tasks/STS/ron/__init__.py create mode 100644 mteb/tasks/STS/rus/__init__.py create mode 100644 scripts/generate_imports.py diff --git a/mteb/__init__.py b/mteb/__init__.py index 6de017b1f1..0953699ff9 100644 --- a/mteb/__init__.py +++ b/mteb/__init__.py @@ -10,7 +10,7 @@ MTEB_RETRIEVAL_WITH_INSTRUCTIONS, CoIR, ) -from mteb.evaluation import * +from mteb.evaluation import MTEB from mteb.load_results import BenchmarkResults, load_results from mteb.models import get_model, get_model_meta, get_model_metas from mteb.overview import TASKS_REGISTRY, get_task, get_tasks @@ -40,4 +40,5 @@ "get_benchmarks", "BenchmarkResults", "BENCHMARK_REGISTRY", + "MTEB", ] diff --git a/mteb/abstasks/__init__.py b/mteb/abstasks/__init__.py index 086866b997..83c2a6d1df 100644 --- a/mteb/abstasks/__init__.py +++ b/mteb/abstasks/__init__.py @@ -1,15 +1,31 @@ from __future__ import annotations -from ..evaluation.LangMapping import * -from .AbsTask import * -from .AbsTaskBitextMining import * -from .AbsTaskClassification import * -from .AbsTaskClustering import * -from .AbsTaskMultilabelClassification import * -from .AbsTaskPairClassification import * -from .AbsTaskReranking import * -from .AbsTaskRetrieval import * -from .AbsTaskSpeedTask import * -from .AbsTaskSTS import * -from .AbsTaskSummarization import * -from .MultilingualTask import * +from .AbsTask import AbsTask +from .AbsTaskBitextMining import AbsTaskBitextMining +from .AbsTaskClassification import AbsTaskClassification +from .AbsTaskClustering import AbsTaskClustering +from .AbsTaskClusteringFast import AbsTaskClusteringFast +from .AbsTaskMultilabelClassification import AbsTaskMultilabelClassification +from .AbsTaskPairClassification import AbsTaskPairClassification +from .AbsTaskReranking import AbsTaskReranking +from .AbsTaskRetrieval import AbsTaskRetrieval +from .AbsTaskSpeedTask import AbsTaskSpeedTask +from .AbsTaskSTS import AbsTaskSTS +from .AbsTaskSummarization import AbsTaskSummarization +from .MultilingualTask import MultilingualTask + +__all__ = [ + "AbsTask", + "AbsTaskBitextMining", + "AbsTaskClassification", + "AbsTaskClustering", + "AbsTaskClusteringFast", + "AbsTaskMultilabelClassification", + "AbsTaskPairClassification", + "AbsTaskReranking", + "AbsTaskRetrieval", + "AbsTaskSpeedTask", + "AbsTaskSTS", + "AbsTaskSummarization", + "MultilingualTask", +] diff --git a/mteb/benchmarks/__init__.py b/mteb/benchmarks/__init__.py index 653b97c6f7..b44a52ed37 100644 --- a/mteb/benchmarks/__init__.py +++ b/mteb/benchmarks/__init__.py @@ -1,4 +1,57 @@ from __future__ import annotations -from mteb.benchmarks.benchmarks import * -from mteb.benchmarks.get_benchmark import * +from mteb.benchmarks.benchmarks import ( + BRIGHT, + LONG_EMBED, + MTEB_DEU, + MTEB_EN, + MTEB_ENG_CLASSIC, + MTEB_EU, + MTEB_FRA, + MTEB_INDIC, + MTEB_JPN, + MTEB_KOR, + MTEB_MAIN_RU, + MTEB_MINERS_BITEXT_MINING, + MTEB_POL, + MTEB_RETRIEVAL_LAW, + MTEB_RETRIEVAL_MEDICAL, + MTEB_RETRIEVAL_WITH_INSTRUCTIONS, + SEB, + Benchmark, + CoIR, + MTEB_code, + MTEB_multilingual, +) +from mteb.benchmarks.get_benchmark import ( + BENCHMARK_REGISTRY, + get_benchmark, + get_benchmarks, +) + +__all__ = [ + "Benchmark", + "MTEB_EN", + "MTEB_ENG_CLASSIC", + "MTEB_MAIN_RU", + "MTEB_RETRIEVAL_WITH_INSTRUCTIONS", + "MTEB_RETRIEVAL_LAW", + "MTEB_RETRIEVAL_MEDICAL", + "MTEB_MINERS_BITEXT_MINING", + "SEB", + "CoIR", + "MTEB_FRA", + "MTEB_DEU", + "MTEB_KOR", + "MTEB_POL", + "MTEB_code", + "MTEB_multilingual", + "MTEB_JPN", + "MTEB_INDIC", + "MTEB_EU", + "LONG_EMBED", + "BRIGHT", + "BENCHMARK_REGISTRY", + "get_benchmarks", + "get_benchmark", +] diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index 261eb97ac7..dc8853dd8e 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -13,19 +13,17 @@ from typing import Any import datasets -from sentence_transformers import CrossEncoder, SentenceTransformer from codecarbon import EmissionsTracker +from sentence_transformers import CrossEncoder, SentenceTransformer from mteb.abstasks.AbsTask import ScoresDict from mteb.encoder_interface import Encoder from mteb.model_meta import ModelMeta from mteb.models import model_meta_from_sentence_transformers -from ..abstasks import * from ..abstasks import AbsTask, AbsTaskReranking from ..load_results.task_results import TaskResult from ..models.sentence_transformer_wrapper import SentenceTransformerWrapper -from ..tasks import * from . import LangMapping logger = logging.getLogger(__name__) diff --git a/mteb/evaluation/__init__.py b/mteb/evaluation/__init__.py index c0a1596c91..58db80480c 100644 --- a/mteb/evaluation/__init__.py +++ b/mteb/evaluation/__init__.py @@ -1,3 +1,43 @@ from __future__ import annotations -from .MTEB import * +from .evaluators import ( + BitextMiningEvaluator, + ClassificationEvaluator, + ClusteringEvaluator, + DenseRetrievalExactSearch, + DeprecatedSummarizationEvaluator, + DRESModel, + Evaluator, + PairClassificationEvaluator, + RetrievalEvaluator, + STSEvaluator, + SummarizationEvaluator, + corpus_to_str, + dot_distance, + kNNClassificationEvaluator, + kNNClassificationEvaluatorPytorch, + logRegClassificationEvaluator, +) +from .LangMapping import LANG_MAPPING +from .MTEB import MTEB + +__all__ = [ + "Evaluator", + "STSEvaluator", + "SummarizationEvaluator", + "DeprecatedSummarizationEvaluator", + "RetrievalEvaluator", + "DRESModel", + "DenseRetrievalExactSearch", + "ClusteringEvaluator", + "BitextMiningEvaluator", + "PairClassificationEvaluator", + "corpus_to_str", + "kNNClassificationEvaluator", + "kNNClassificationEvaluatorPytorch", + "logRegClassificationEvaluator", + "dot_distance", + "LANG_MAPPING", + "MTEB", + "ClassificationEvaluator", +] diff --git a/mteb/evaluation/evaluators/__init__.py b/mteb/evaluation/evaluators/__init__.py index fc293a3448..ac2a886067 100644 --- a/mteb/evaluation/evaluators/__init__.py +++ b/mteb/evaluation/evaluators/__init__.py @@ -1,9 +1,37 @@ from __future__ import annotations -from .BitextMiningEvaluator import * -from .ClassificationEvaluator import * -from .ClusteringEvaluator import * -from .PairClassificationEvaluator import * -from .RetrievalEvaluator import * -from .STSEvaluator import * -from .SummarizationEvaluator import * +from .BitextMiningEvaluator import BitextMiningEvaluator +from .ClassificationEvaluator import ( + dot_distance, + kNNClassificationEvaluator, + kNNClassificationEvaluatorPytorch, + logRegClassificationEvaluator, +) +from .ClusteringEvaluator import ClusteringEvaluator +from .Evaluator import Evaluator +from .model_classes import DenseRetrievalExactSearch, DRESModel, corpus_to_str +from .PairClassificationEvaluator import PairClassificationEvaluator +from .RetrievalEvaluator import RetrievalEvaluator +from .STSEvaluator import STSEvaluator +from .SummarizationEvaluator import ( + DeprecatedSummarizationEvaluator, + SummarizationEvaluator, +) + +__all__ = [ + "Evaluator", + "STSEvaluator", + "SummarizationEvaluator", + "DeprecatedSummarizationEvaluator", + "RetrievalEvaluator", + "DRESModel", + "DenseRetrievalExactSearch", + "ClusteringEvaluator", + "BitextMiningEvaluator", + "PairClassificationEvaluator", + "corpus_to_str", + "kNNClassificationEvaluator", + "kNNClassificationEvaluatorPytorch", + "logRegClassificationEvaluator", + "dot_distance", +] diff --git a/mteb/models/arctic_models.py b/mteb/models/arctic_models.py index 064f367592..ce1db29bbd 100644 --- a/mteb/models/arctic_models.py +++ b/mteb/models/arctic_models.py @@ -162,4 +162,4 @@ use_instructions=False, adapted_from="intfloat/e5-base-unsupervised", superseded_by=None, -) \ No newline at end of file +) diff --git a/mteb/tasks/BitextMining/__init__.py b/mteb/tasks/BitextMining/__init__.py index c176077215..f43f53a49a 100644 --- a/mteb/tasks/BitextMining/__init__.py +++ b/mteb/tasks/BitextMining/__init__.py @@ -1,24 +1,51 @@ from __future__ import annotations -from .dan.BornholmskBitextMining import * -from .kat.TbilisiCityHallBitextMining import * -from .multilingual.BibleNLPBitextMining import * -from .multilingual.BUCCBitextMining import * -from .multilingual.BUCCBitextMiningFast import * -from .multilingual.DiaBLaBitextMining import * -from .multilingual.FloresBitextMining import * -from .multilingual.IN22ConvBitextMining import * -from .multilingual.IN22GenBitextMining import * -from .multilingual.IndicGenBenchFloresBitextMining import * -from .multilingual.IWSLT2017BitextMining import * -from .multilingual.LinceMTBitextMining import * -from .multilingual.NollySentiBitextMining import * -from .multilingual.NorwegianCourtsBitextMining import * -from .multilingual.NTREXBitextMining import * -from .multilingual.NusaTranslationBitextMining import * -from .multilingual.NusaXBitextMining import * -from .multilingual.PhincBitextMining import * -from .multilingual.RomaTalesBitextMining import * -from .multilingual.TatoebaBitextMining import * -from .srn.SRNCorpusBitextMining import * -from .vie.VieMedEVBitextMining import * +from .dan import BornholmBitextMining +from .kat import TbilisiCityHallBitextMining +from .multilingual import ( + BibleNLPBitextMining, + BUCCBitextMining, + BUCCBitextMiningFast, + DiaBLaBitextMining, + FloresBitextMining, + IN22ConvBitextMining, + IN22GenBitextMining, + IndicGenBenchFloresBitextMining, + IWSLT2017BitextMining, + LinceMTBitextMining, + NollySentiBitextMining, + NorwegianCourtsBitextMining, + NTREXBitextMining, + NusaTranslationBitextMining, + NusaXBitextMining, + PhincBitextMining, + RomaTalesBitextMining, + TatoebaBitextMining, +) +from .srn import SRNCorpusBitextMining +from .vie import VieMedEVBitextMining + +__all__ = [ + "TbilisiCityHallBitextMining", + "VieMedEVBitextMining", + "BornholmBitextMining", + "SRNCorpusBitextMining", + "IN22ConvBitextMining", + "IN22GenBitextMining", + "BUCCBitextMining", + "LinceMTBitextMining", + "NusaTranslationBitextMining", + "DiaBLaBitextMining", + "NTREXBitextMining", + "IndicGenBenchFloresBitextMining", + "NollySentiBitextMining", + "BUCCBitextMiningFast", + "PhincBitextMining", + "TatoebaBitextMining", + "NusaXBitextMining", + "IWSLT2017BitextMining", + "BibleNLPBitextMining", + "FloresBitextMining", + "RomaTalesBitextMining", + "NorwegianCourtsBitextMining", +] diff --git a/mteb/tasks/BitextMining/dan/__init__.py b/mteb/tasks/BitextMining/dan/__init__.py index e69de29bb2..00f3bbf4aa 100644 --- a/mteb/tasks/BitextMining/dan/__init__.py +++ b/mteb/tasks/BitextMining/dan/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .BornholmskBitextMining import BornholmBitextMining + +__all__ = ["BornholmBitextMining"] diff --git a/mteb/tasks/BitextMining/kat/__init__.py b/mteb/tasks/BitextMining/kat/__init__.py index e69de29bb2..808630021d 100644 --- a/mteb/tasks/BitextMining/kat/__init__.py +++ b/mteb/tasks/BitextMining/kat/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .TbilisiCityHallBitextMining import TbilisiCityHallBitextMining + +__all__ = ["TbilisiCityHallBitextMining"] diff --git a/mteb/tasks/BitextMining/multilingual/__init__.py b/mteb/tasks/BitextMining/multilingual/__init__.py index e69de29bb2..e7c6ca70f6 100644 --- a/mteb/tasks/BitextMining/multilingual/__init__.py +++ b/mteb/tasks/BitextMining/multilingual/__init__.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from .BibleNLPBitextMining import BibleNLPBitextMining +from .BUCCBitextMining import BUCCBitextMining +from .BUCCBitextMiningFast import BUCCBitextMiningFast +from .DiaBLaBitextMining import DiaBLaBitextMining +from .FloresBitextMining import FloresBitextMining +from .IN22ConvBitextMining import IN22ConvBitextMining +from .IN22GenBitextMining import IN22GenBitextMining +from .IndicGenBenchFloresBitextMining import IndicGenBenchFloresBitextMining +from .IWSLT2017BitextMining import IWSLT2017BitextMining +from .LinceMTBitextMining import LinceMTBitextMining +from .NollySentiBitextMining import NollySentiBitextMining +from .NorwegianCourtsBitextMining import NorwegianCourtsBitextMining +from .NTREXBitextMining import NTREXBitextMining +from .NusaTranslationBitextMining import NusaTranslationBitextMining +from .NusaXBitextMining import NusaXBitextMining +from .PhincBitextMining import PhincBitextMining +from .RomaTalesBitextMining import RomaTalesBitextMining +from .TatoebaBitextMining import TatoebaBitextMining + +__all__ = [ + "IN22ConvBitextMining", + "IN22GenBitextMining", + "BUCCBitextMining", + "LinceMTBitextMining", + "NusaTranslationBitextMining", + "DiaBLaBitextMining", + "NTREXBitextMining", + "IndicGenBenchFloresBitextMining", + "NollySentiBitextMining", + "BUCCBitextMiningFast", + "PhincBitextMining", + "TatoebaBitextMining", + "NusaXBitextMining", + "IWSLT2017BitextMining", + "BibleNLPBitextMining", + "FloresBitextMining", + "RomaTalesBitextMining", + "NorwegianCourtsBitextMining", +] diff --git a/mteb/tasks/BitextMining/srn/__init__.py b/mteb/tasks/BitextMining/srn/__init__.py index e69de29bb2..b3d0401a96 100644 --- a/mteb/tasks/BitextMining/srn/__init__.py +++ b/mteb/tasks/BitextMining/srn/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SRNCorpusBitextMining import SRNCorpusBitextMining + +__all__ = ["SRNCorpusBitextMining"] diff --git a/mteb/tasks/BitextMining/vie/__init__.py b/mteb/tasks/BitextMining/vie/__init__.py index e69de29bb2..2ca8d5290d 100644 --- a/mteb/tasks/BitextMining/vie/__init__.py +++ b/mteb/tasks/BitextMining/vie/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .VieMedEVBitextMining import VieMedEVBitextMining + +__all__ = ["VieMedEVBitextMining"] diff --git a/mteb/tasks/Classification/__init__.py b/mteb/tasks/Classification/__init__.py index 3e80ae2181..ca9f8bd64d 100644 --- a/mteb/tasks/Classification/__init__.py +++ b/mteb/tasks/Classification/__init__.py @@ -1,144 +1,543 @@ from __future__ import annotations -from .ara.AJGT import * -from .ara.HotelReviewSentimentClassification import * -from .ara.OnlineStoreReviewSentimentClassification import * -from .ara.RestaurantReviewSentimentClassification import * -from .ara.TweetEmotionClassification import * -from .ara.TweetSarcasmClassification import * -from .ben.BengaliDocumentClassification import * -from .ben.BengaliHateSpeechClassification import * -from .ben.BengaliSentimentAnalysis import * -from .bul.BulgarianStoreReviewSentimentClassfication import * -from .ces.CSFDCZMovieReviewSentimentClassification import * -from .ces.CzechProductReviewSentimentClassification import * -from .ces.CzechSoMeSentimentClassification import * -from .ces.CzechSubjectivityClassification import * -from .dan.AngryTweetsClassification import * -from .dan.DanishPoliticalCommentsClassification import * -from .dan.DKHateClassification import * -from .dan.LccSentimentClassification import * -from .deu.GermanPoliticiansTwitterSentimentClassification import * -from .deu.TenKGnadClassification import * -from .ell.GreekLegalCodeClassification import * -from .eng.AmazonPolarityClassification import * -from .eng.ArxivClassification import * -from .eng.Banking77Classification import * -from .eng.DBpediaClassification import * -from .eng.EmotionClassification import * -from .eng.FinancialPhrasebankClassification import * -from .eng.FrenkEnClassification import * -from .eng.ImdbClassification import * -from .eng.LegalBenchClassification import * -from .eng.NewsClassification import * -from .eng.PatentClassification import * -from .eng.PoemSentimentClassification import * -from .eng.ToxicChatClassification import * -from .eng.ToxicConversationsClassification import * -from .eng.TweetSentimentExtractionClassification import * -from .eng.TweetTopicSingleClassification import * -from .eng.YahooAnswersTopicsClassification import * -from .eng.YelpReviewFullClassification import * -from .est.estonian_valence import * -from .fas.PersianFoodSentimentClassification import * -from .fil.FilipinoHateSpeechClassification import * -from .fil.FilipinoShopeeReviewsClassification import * -from .fin.FinToxicityClassification import * -from .fra.FrenchBookReviews import * -from .fra.MovieReviewSentimentClassification import * -from .guj.GujaratiNewsClassification import * -from .heb.HebrewSentimentAnalysis import * -from .hin.HindiDiscourseClassification import * -from .hin.SentimentAnalysisHindi import * -from .hrv.FrenkHrClassification import * -from .ind.IndonesianIdClickbaitClassification import * -from .ind.IndonesianMongabayConservationClassification import * -from .ita.ItaCaseholdClassification import * -from .ita.ItalianLinguistAcceptabilityClassification import * -from .jav.JavaneseIMDBClassification import * -from .jpn.WRIMEClassification import * -from .kan.KannadaNewsClassification import * -from .kor.KlueTC import * -from .kor.KorFin import * -from .kor.KorHateClassification import * -from .kor.KorSarcasmClassification import * -from .kur.KurdishSentimentClassification import * -from .mal.MalayalamNewsClassification import * -from .mar.MarathiNewsClassification import * -from .mkd.MacedonianTweetSentimentClassification import * -from .multilingual.AfriSentiClassification import * -from .multilingual.AfriSentiLangClassification import * -from .multilingual.AmazonCounterfactualClassification import * -from .multilingual.AmazonReviewsClassification import * -from .multilingual.CataloniaTweetClassification import * -from .multilingual.CyrillicTurkicLangClassification import * -from .multilingual.HinDialectClassification import * -from .multilingual.IndicLangClassification import * -from .multilingual.IndicNLPNewsClassification import * -from .multilingual.IndicSentimentClassification import * -from .multilingual.LanguageClassification import * -from .multilingual.MasakhaNEWSClassification import * -from .multilingual.MassiveIntentClassification import * -from .multilingual.MassiveScenarioClassification import * -from .multilingual.MTOPDomainClassification import * -from .multilingual.MTOPIntentClassification import * -from .multilingual.MultiHateClassification import * -from .multilingual.MultilingualSentimentClassification import * -from .multilingual.NaijaSenti import * -from .multilingual.NordicLangClassification import * -from .multilingual.NusaParagraphEmotionClassification import * -from .multilingual.NusaParagraphTopicClassification import * -from .multilingual.NusaXSenti import * -from .multilingual.ScalaClassification import * -from .multilingual.SIB200Classification import * -from .multilingual.SouthAfricanLangClassification import * -from .multilingual.SwissJudgementClassification import * -from .multilingual.TurkicClassification import * -from .multilingual.TweetSentimentClassification import * -from .mya.MyanmarNews import * -from .nep.NepaliNewsClassification import * -from .nld.DutchBookReviewSentimentClassification import * -from .nob.NoRecClassification import * -from .nob.NorwegianParliamentClassification import * -from .ory.OdiaNewsClassification import * -from .pan.PunjabiNewsClassification import * -from .pol.PolishClassification import * -from .por.HateSpeechPortugueseClassification import * -from .ron.Moroco import * -from .ron.RomanianReviewsSentiment import * -from .ron.RomanianSentimentClassification import * -from .rus.GeoreviewClassification import * -from .rus.HeadlineClassification import * -from .rus.InappropriatenessClassification import * -from .rus.KinopoiskClassification import * -from .rus.RuReviewsClassification import * -from .rus.RuSciBenchGRNTIClassification import * -from .rus.RuSciBenchOECDClassification import * -from .san.SanskritShlokasClassification import * -from .sin.SinhalaNewsClassification import * -from .sin.SinhalaNewsSourceClassification import * -from .slk.CSFDSKMovieReviewSentimentClassification import * -from .slk.SlovakHateSpeechClassification import * -from .slv.FrenkSlClassification import * -from .spa.SpanishNewsClassification import * -from .spa.SpanishSentimentClassification import * -from .ssw.SiswatiNewsClassification import * -from .svk.SlovakMovieReviewSentimentClassification import * -from .swa.SwahiliNewsClassification import * -from .swe.DalajClassification import * -from .swe.SwedishSentimentClassification import * -from .swe.SweRecClassification import * -from .tam.TamilNewsClassification import * -from .tel.TeluguAndhraJyotiNewsClassification import * -from .tha.WisesightSentimentClassification import * -from .tsn.TswanaNewsClassification import * -from .tur.TurkishMovieSentimentClassification import * -from .tur.TurkishProductSentimentClassification import * -from .ukr.UkrFormalityClassification import * -from .urd.UrduRomanSentimentClassification import * -from .vie.VieStudentFeedbackClassification import * -from .zho.CMTEBClassification import * -from .zho.YueOpenriceReviewClassification import ( - YueOpenriceReviewClassification, # noqa: F401 +from .ara import ( + AJGT, + HotelReviewSentimentClassification, + OnlineStoreReviewSentimentClassification, + RestaurantReviewSentimentClassification, + TweetEmotionClassification, + TweetSarcasmClassification, ) -from .zul.IsiZuluNewsClassification import * +from .ben import ( + BengaliDocumentClassification, + BengaliHateSpeechClassification, + BengaliSentimentAnalysis, +) +from .bul import BulgarianStoreReviewSentimentClassfication +from .ces import ( + CSFDCZMovieReviewSentimentClassification, + CzechProductReviewSentimentClassification, + CzechSoMeSentimentClassification, + CzechSubjectivityClassification, +) +from .dan import ( + AngryTweetsClassification, + DanishPoliticalCommentsClassification, + DdiscoCohesionClassification, + DKHateClassification, + LccSentimentClassification, +) +from .deu import GermanPoliticiansTwitterSentimentClassification, TenKGnadClassification +from .ell import GreekLegalCodeClassification +from .eng import ( + AmazonPolarityClassification, + ArxivClassification, + Banking77Classification, + CanadaTaxCourtOutcomesLegalBenchClassification, + ContractNLIConfidentialityOfAgreementLegalBenchClassification, + ContractNLIExplicitIdentificationLegalBenchClassification, + ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification, + ContractNLILimitedUseLegalBenchClassification, + ContractNLINoLicensingLegalBenchClassification, + ContractNLINoticeOnCompelledDisclosureLegalBenchClassification, + ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification, + ContractNLIPermissibleCopyLegalBenchClassification, + ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification, + ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification, + ContractNLIReturnOfConfidentialInformationLegalBenchClassification, + ContractNLISharingWithEmployeesLegalBenchClassification, + ContractNLISharingWithThirdPartiesLegalBenchClassification, + ContractNLISurvivalOfObligationsLegalBenchClassification, + CorporateLobbyingLegalBenchClassification, + CUADAffiliateLicenseLicenseeLegalBenchClassification, + CUADAffiliateLicenseLicensorLegalBenchClassification, + CUADAntiAssignmentLegalBenchClassification, + CUADAuditRightsLegalBenchClassification, + CUADCapOnLiabilityLegalBenchClassification, + CUADChangeOfControlLegalBenchClassification, + CUADCompetitiveRestrictionExceptionLegalBenchClassification, + CUADCovenantNotToSueLegalBenchClassification, + CUADEffectiveDateLegalBenchClassification, + CUADExclusivityLegalBenchClassification, + CUADExpirationDateLegalBenchClassification, + CUADGoverningLawLegalBenchClassification, + CUADInsuranceLegalBenchClassification, + CUADIPOwnershipAssignmentLegalBenchClassification, + CUADIrrevocableOrPerpetualLicenseLegalBenchClassification, + CUADJointIPOwnershipLegalBenchClassification, + CUADLicenseGrantLegalBenchClassification, + CUADLiquidatedDamagesLegalBenchClassification, + CUADMinimumCommitmentLegalBenchClassification, + CUADMostFavoredNationLegalBenchClassification, + CUADNonCompeteLegalBenchClassification, + CUADNonDisparagementLegalBenchClassification, + CUADNonTransferableLicenseLegalBenchClassification, + CUADNoSolicitOfCustomersLegalBenchClassification, + CUADNoSolicitOfEmployeesLegalBenchClassification, + CUADNoticePeriodToTerminateRenewalLegalBenchClassification, + CUADPostTerminationServicesLegalBenchClassification, + CUADPriceRestrictionsLegalBenchClassification, + CUADRenewalTermLegalBenchClassification, + CUADRevenueProfitSharingLegalBenchClassification, + CUADRofrRofoRofnLegalBenchClassification, + CUADSourceCodeEscrowLegalBenchClassification, + CUADTerminationForConvenienceLegalBenchClassification, + CUADThirdPartyBeneficiaryLegalBenchClassification, + CUADUncappedLiabilityLegalBenchClassification, + CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification, + CUADVolumeRestrictionLegalBenchClassification, + CUADWarrantyDurationLegalBenchClassification, + DBpediaClassification, + DefinitionClassificationLegalBenchClassification, + Diversity1LegalBenchClassification, + Diversity2LegalBenchClassification, + Diversity3LegalBenchClassification, + Diversity4LegalBenchClassification, + Diversity5LegalBenchClassification, + Diversity6LegalBenchClassification, + EmotionClassification, + FinancialPhrasebankClassification, + FrenkEnClassification, + FunctionOfDecisionSectionLegalBenchClassification, + ImdbClassification, + InsurancePolicyInterpretationLegalBenchClassification, + InternationalCitizenshipQuestionsLegalBenchClassification, + JCrewBlockerLegalBenchClassification, + LearnedHandsBenefitsLegalBenchClassification, + LearnedHandsBusinessLegalBenchClassification, + LearnedHandsConsumerLegalBenchClassification, + LearnedHandsCourtsLegalBenchClassification, + LearnedHandsCrimeLegalBenchClassification, + LearnedHandsDivorceLegalBenchClassification, + LearnedHandsDomesticViolenceLegalBenchClassification, + LearnedHandsEducationLegalBenchClassification, + LearnedHandsEmploymentLegalBenchClassification, + LearnedHandsEstatesLegalBenchClassification, + LearnedHandsFamilyLegalBenchClassification, + LearnedHandsHealthLegalBenchClassification, + LearnedHandsHousingLegalBenchClassification, + LearnedHandsImmigrationLegalBenchClassification, + LearnedHandsTortsLegalBenchClassification, + LearnedHandsTrafficLegalBenchClassification, + LegalReasoningCausalityLegalBenchClassification, + MAUDLegalBenchClassification, + NewsClassification, + NYSJudicialEthicsLegalBenchClassification, + OPP115DataRetentionLegalBenchClassification, + OPP115DataSecurityLegalBenchClassification, + OPP115DoNotTrackLegalBenchClassification, + OPP115FirstPartyCollectionUseLegalBenchClassification, + OPP115InternationalAndSpecificAudiencesLegalBenchClassification, + OPP115PolicyChangeLegalBenchClassification, + OPP115ThirdPartySharingCollectionLegalBenchClassification, + OPP115UserAccessEditAndDeletionLegalBenchClassification, + OPP115UserChoiceControlLegalBenchClassification, + OralArgumentQuestionPurposeLegalBenchClassification, + OverrulingLegalBenchClassification, + PatentClassification, + PersonalJurisdictionLegalBenchClassification, + PoemSentimentClassification, + PROALegalBenchClassification, + SCDBPAccountabilityLegalBenchClassification, + SCDBPAuditsLegalBenchClassification, + SCDBPCertificationLegalBenchClassification, + SCDBPTrainingLegalBenchClassification, + SCDBPVerificationLegalBenchClassification, + SCDDAccountabilityLegalBenchClassification, + SCDDAuditsLegalBenchClassification, + SCDDCertificationLegalBenchClassification, + SCDDTrainingLegalBenchClassification, + SCDDVerificationLegalBenchClassification, + TelemarketingSalesRuleLegalBenchClassification, + TextualismToolDictionariesLegalBenchClassification, + TextualismToolPlainLegalBenchClassification, + ToxicChatClassification, + ToxicConversationsClassification, + TweetSentimentExtractionClassification, + TweetTopicSingleClassification, + UCCVCommonLawLegalBenchClassification, + UnfairTOSLegalBenchClassification, + YahooAnswersTopicsClassification, + YelpReviewFullClassification, +) +from .est import EstonianValenceClassification +from .fas import PersianFoodSentimentClassification +from .fil import FilipinoHateSpeechClassification, FilipinoShopeeReviewsClassification +from .fin import FinToxicityClassification +from .fra import FrenchBookReviews, MovieReviewSentimentClassification +from .guj import GujaratiNewsClassification +from .heb import HebrewSentimentAnalysis +from .hin import HindiDiscourseClassification, SentimentAnalysisHindi +from .hrv import FrenkHrClassification +from .ind import ( + IndonesianIdClickbaitClassification, + IndonesianMongabayConservationClassification, +) +from .ita import ItaCaseholdClassification, ItalianLinguisticAcceptabilityClassification +from .jav import JavaneseIMDBClassification +from .jpn import WRIMEClassification +from .kan import KannadaNewsClassification +from .kat import GeorgianSentimentClassification +from .kor import KlueTC, KorFin, KorHateClassification, KorSarcasmClassification +from .kur import KurdishSentimentClassification +from .mal import MalayalamNewsClassification +from .mar import MarathiNewsClassification +from .mkd import MacedonianTweetSentimentClassification +from .multilingual import ( + AfriSentiClassification, + AfriSentiLangClassification, + AmazonCounterfactualClassification, + AmazonReviewsClassification, + CataloniaTweetClassification, + CyrillicTurkicLangClassification, + HinDialectClassification, + IndicLangClassification, + IndicNLPNewsClassification, + IndicSentimentClassification, + LanguageClassification, + MasakhaNEWSClassification, + MassiveIntentClassification, + MassiveScenarioClassification, + MTOPDomainClassification, + MTOPIntentClassification, + MultiHateClassification, + MultilingualSentimentClassification, + NaijaSenti, + NordicLangClassification, + NusaParagraphEmotionClassification, + NusaParagraphTopicClassification, + NusaXSentiClassification, + ScalaClassification, + SIB200Classification, + SouthAfricanLangClassification, + SwissJudgementClassification, + TurkicClassification, + TweetSentimentClassification, +) +from .mya import MyanmarNews +from .nep import NepaliNewsClassification +from .nld import DutchBookReviewSentimentClassification +from .nob import NoRecClassification, NorwegianParliamentClassification +from .ory import OdiaNewsClassification +from .pan import PunjabiNewsClassification +from .pol import ( + AllegroReviewsClassification, + CbdClassification, + PacClassification, + PolEmo2InClassification, + PolEmo2OutClassification, +) +from .por import HateSpeechPortugueseClassification +from .ron import Moroco, RomanianReviewsSentiment, RomanianSentimentClassification +from .rus import ( + GeoreviewClassification, + HeadlineClassification, + InappropriatenessClassification, + KinopoiskClassification, + RuReviewsClassification, + RuSciBenchGRNTIClassification, + RuSciBenchOECDClassification, +) +from .san import SanskritShlokasClassification +from .sin import SinhalaNewsClassification, SinhalaNewsSourceClassification +from .slk import ( + CSFDSKMovieReviewSentimentClassification, + SlovakHateSpeechClassification, +) +from .slv import FrenkSlClassification +from .spa import SpanishNewsClassification, SpanishSentimentClassification +from .ssw import SiswatiNewsClassification +from .svk import SlovakMovieReviewSentimentClassification +from .swa import SwahiliNewsClassification +from .swe import ( + DalajClassification, + SwedishSentimentClassification, + SweRecClassification, +) +from .tam import TamilNewsClassification +from .tel import TeluguAndhraJyotiNewsClassification +from .tha import WisesightSentimentClassification, WongnaiReviewsClassification +from .tsn import TswanaNewsClassification +from .tur import ( + TurkishMovieSentimentClassification, + TurkishProductSentimentClassification, +) +from .ukr import UkrFormalityClassification +from .urd import UrduRomanSentimentClassification +from .vie import VieStudentFeedbackClassification +from .zho import ( + IFlyTek, + JDReview, + MultilingualSentiment, + OnlineShopping, + TNews, + Waimai, + YueOpenriceReviewClassification, +) +from .zul import IsiZuluNewsClassification + +__all__ = [ + "TeluguAndhraJyotiNewsClassification", + "IFlyTek", + "JDReview", + "MultilingualSentiment", + "OnlineShopping", + "TNews", + "Waimai", + "YueOpenriceReviewClassification", + "HateSpeechPortugueseClassification", + "SpanishNewsClassification", + "SpanishSentimentClassification", + "AllegroReviewsClassification", + "CbdClassification", + "PacClassification", + "PolEmo2InClassification", + "PolEmo2OutClassification", + "BulgarianStoreReviewSentimentClassfication", + "KurdishSentimentClassification", + "ItaCaseholdClassification", + "ItalianLinguisticAcceptabilityClassification", + "GeorgianSentimentClassification", + "DalajClassification", + "SweRecClassification", + "SwedishSentimentClassification", + "CSFDSKMovieReviewSentimentClassification", + "SlovakHateSpeechClassification", + "NorwegianParliamentClassification", + "NoRecClassification", + "FilipinoHateSpeechClassification", + "FilipinoShopeeReviewsClassification", + "MarathiNewsClassification", + "IndonesianIdClickbaitClassification", + "IndonesianMongabayConservationClassification", + "UrduRomanSentimentClassification", + "MacedonianTweetSentimentClassification", + "FrenkSlClassification", + "SwahiliNewsClassification", + "FinToxicityClassification", + "KannadaNewsClassification", + "TenKGnadClassification", + "GermanPoliticiansTwitterSentimentClassification", + "PunjabiNewsClassification", + "TswanaNewsClassification", + "TweetSarcasmClassification", + "TweetEmotionClassification", + "RestaurantReviewSentimentClassification", + "HotelReviewSentimentClassification", + "OnlineStoreReviewSentimentClassification", + "AJGT", + "TurkishProductSentimentClassification", + "TurkishMovieSentimentClassification", + "NepaliNewsClassification", + "VieStudentFeedbackClassification", + "DutchBookReviewSentimentClassification", + "SiswatiNewsClassification", + "UkrFormalityClassification", + "SanskritShlokasClassification", + "SlovakMovieReviewSentimentClassification", + "AngryTweetsClassification", + "DdiscoCohesionClassification", + "DanishPoliticalCommentsClassification", + "DKHateClassification", + "LccSentimentClassification", + "TamilNewsClassification", + "CSFDCZMovieReviewSentimentClassification", + "CzechSubjectivityClassification", + "CzechProductReviewSentimentClassification", + "CzechSoMeSentimentClassification", + "EstonianValenceClassification", + "MyanmarNews", + "JavaneseIMDBClassification", + "YahooAnswersTopicsClassification", + "CUADAffiliateLicenseLicenseeLegalBenchClassification", + "CUADAffiliateLicenseLicensorLegalBenchClassification", + "CUADAntiAssignmentLegalBenchClassification", + "CUADAuditRightsLegalBenchClassification", + "CUADCapOnLiabilityLegalBenchClassification", + "CUADChangeOfControlLegalBenchClassification", + "CUADCompetitiveRestrictionExceptionLegalBenchClassification", + "CUADCovenantNotToSueLegalBenchClassification", + "CUADEffectiveDateLegalBenchClassification", + "CUADExclusivityLegalBenchClassification", + "CUADExpirationDateLegalBenchClassification", + "CUADGoverningLawLegalBenchClassification", + "CUADIPOwnershipAssignmentLegalBenchClassification", + "CUADInsuranceLegalBenchClassification", + "CUADIrrevocableOrPerpetualLicenseLegalBenchClassification", + "CUADJointIPOwnershipLegalBenchClassification", + "CUADLicenseGrantLegalBenchClassification", + "CUADLiquidatedDamagesLegalBenchClassification", + "CUADMinimumCommitmentLegalBenchClassification", + "CUADMostFavoredNationLegalBenchClassification", + "CUADNoSolicitOfCustomersLegalBenchClassification", + "CUADNoSolicitOfEmployeesLegalBenchClassification", + "CUADNonCompeteLegalBenchClassification", + "CUADNonDisparagementLegalBenchClassification", + "CUADNonTransferableLicenseLegalBenchClassification", + "CUADNoticePeriodToTerminateRenewalLegalBenchClassification", + "CUADPostTerminationServicesLegalBenchClassification", + "CUADPriceRestrictionsLegalBenchClassification", + "CUADRenewalTermLegalBenchClassification", + "CUADRevenueProfitSharingLegalBenchClassification", + "CUADRofrRofoRofnLegalBenchClassification", + "CUADSourceCodeEscrowLegalBenchClassification", + "CUADTerminationForConvenienceLegalBenchClassification", + "CUADThirdPartyBeneficiaryLegalBenchClassification", + "CUADUncappedLiabilityLegalBenchClassification", + "CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification", + "CUADVolumeRestrictionLegalBenchClassification", + "CUADWarrantyDurationLegalBenchClassification", + "CanadaTaxCourtOutcomesLegalBenchClassification", + "ContractNLIConfidentialityOfAgreementLegalBenchClassification", + "ContractNLIExplicitIdentificationLegalBenchClassification", + "ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification", + "ContractNLILimitedUseLegalBenchClassification", + "ContractNLINoLicensingLegalBenchClassification", + "ContractNLINoticeOnCompelledDisclosureLegalBenchClassification", + "ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification", + "ContractNLIPermissibleCopyLegalBenchClassification", + "ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification", + "ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification", + "ContractNLIReturnOfConfidentialInformationLegalBenchClassification", + "ContractNLISharingWithEmployeesLegalBenchClassification", + "ContractNLISharingWithThirdPartiesLegalBenchClassification", + "ContractNLISurvivalOfObligationsLegalBenchClassification", + "CorporateLobbyingLegalBenchClassification", + "DefinitionClassificationLegalBenchClassification", + "Diversity1LegalBenchClassification", + "Diversity2LegalBenchClassification", + "Diversity3LegalBenchClassification", + "Diversity4LegalBenchClassification", + "Diversity5LegalBenchClassification", + "Diversity6LegalBenchClassification", + "FunctionOfDecisionSectionLegalBenchClassification", + "InsurancePolicyInterpretationLegalBenchClassification", + "InternationalCitizenshipQuestionsLegalBenchClassification", + "JCrewBlockerLegalBenchClassification", + "LearnedHandsBenefitsLegalBenchClassification", + "LearnedHandsBusinessLegalBenchClassification", + "LearnedHandsConsumerLegalBenchClassification", + "LearnedHandsCourtsLegalBenchClassification", + "LearnedHandsCrimeLegalBenchClassification", + "LearnedHandsDivorceLegalBenchClassification", + "LearnedHandsDomesticViolenceLegalBenchClassification", + "LearnedHandsEducationLegalBenchClassification", + "LearnedHandsEmploymentLegalBenchClassification", + "LearnedHandsEstatesLegalBenchClassification", + "LearnedHandsFamilyLegalBenchClassification", + "LearnedHandsHealthLegalBenchClassification", + "LearnedHandsHousingLegalBenchClassification", + "LearnedHandsImmigrationLegalBenchClassification", + "LearnedHandsTortsLegalBenchClassification", + "LearnedHandsTrafficLegalBenchClassification", + "LegalReasoningCausalityLegalBenchClassification", + "MAUDLegalBenchClassification", + "NYSJudicialEthicsLegalBenchClassification", + "OPP115DataRetentionLegalBenchClassification", + "OPP115DataSecurityLegalBenchClassification", + "OPP115DoNotTrackLegalBenchClassification", + "OPP115FirstPartyCollectionUseLegalBenchClassification", + "OPP115InternationalAndSpecificAudiencesLegalBenchClassification", + "OPP115PolicyChangeLegalBenchClassification", + "OPP115ThirdPartySharingCollectionLegalBenchClassification", + "OPP115UserAccessEditAndDeletionLegalBenchClassification", + "OPP115UserChoiceControlLegalBenchClassification", + "OralArgumentQuestionPurposeLegalBenchClassification", + "OverrulingLegalBenchClassification", + "PROALegalBenchClassification", + "PersonalJurisdictionLegalBenchClassification", + "SCDBPAccountabilityLegalBenchClassification", + "SCDBPAuditsLegalBenchClassification", + "SCDBPCertificationLegalBenchClassification", + "SCDBPTrainingLegalBenchClassification", + "SCDBPVerificationLegalBenchClassification", + "SCDDAccountabilityLegalBenchClassification", + "SCDDAuditsLegalBenchClassification", + "SCDDCertificationLegalBenchClassification", + "SCDDTrainingLegalBenchClassification", + "SCDDVerificationLegalBenchClassification", + "TelemarketingSalesRuleLegalBenchClassification", + "TextualismToolDictionariesLegalBenchClassification", + "TextualismToolPlainLegalBenchClassification", + "UCCVCommonLawLegalBenchClassification", + "UnfairTOSLegalBenchClassification", + "FinancialPhrasebankClassification", + "DBpediaClassification", + "FrenkEnClassification", + "TweetTopicSingleClassification", + "AmazonPolarityClassification", + "NewsClassification", + "ToxicChatClassification", + "YelpReviewFullClassification", + "ToxicConversationsClassification", + "TweetSentimentExtractionClassification", + "PatentClassification", + "ImdbClassification", + "ArxivClassification", + "EmotionClassification", + "PoemSentimentClassification", + "Banking77Classification", + "PersianFoodSentimentClassification", + "HebrewSentimentAnalysis", + "BengaliSentimentAnalysis", + "BengaliDocumentClassification", + "BengaliHateSpeechClassification", + "SinhalaNewsSourceClassification", + "SinhalaNewsClassification", + "WisesightSentimentClassification", + "WongnaiReviewsClassification", + "WRIMEClassification", + "RomanianReviewsSentiment", + "Moroco", + "RomanianSentimentClassification", + "KorSarcasmClassification", + "KorHateClassification", + "KorFin", + "KlueTC", + "IndicLangClassification", + "SouthAfricanLangClassification", + "SwissJudgementClassification", + "AmazonReviewsClassification", + "NaijaSenti", + "TurkicClassification", + "ScalaClassification", + "MultilingualSentimentClassification", + "SIB200Classification", + "NordicLangClassification", + "NusaParagraphTopicClassification", + "CyrillicTurkicLangClassification", + "IndicNLPNewsClassification", + "MassiveScenarioClassification", + "MTOPIntentClassification", + "NusaParagraphEmotionClassification", + "MultiHateClassification", + "AfriSentiClassification", + "IndicSentimentClassification", + "LanguageClassification", + "AfriSentiLangClassification", + "NusaXSentiClassification", + "MTOPDomainClassification", + "HinDialectClassification", + "CataloniaTweetClassification", + "TweetSentimentClassification", + "MassiveIntentClassification", + "AmazonCounterfactualClassification", + "MasakhaNEWSClassification", + "GujaratiNewsClassification", + "IsiZuluNewsClassification", + "KinopoiskClassification", + "HeadlineClassification", + "InappropriatenessClassification", + "RuSciBenchGRNTIClassification", + "RuSciBenchOECDClassification", + "RuReviewsClassification", + "GeoreviewClassification", + "OdiaNewsClassification", + "GreekLegalCodeClassification", + "MovieReviewSentimentClassification", + "FrenchBookReviews", + "FrenkHrClassification", + "HindiDiscourseClassification", + "SentimentAnalysisHindi", + "MalayalamNewsClassification", +] diff --git a/mteb/tasks/Classification/ara/__init__.py b/mteb/tasks/Classification/ara/__init__.py index e69de29bb2..f23b02d135 100644 --- a/mteb/tasks/Classification/ara/__init__.py +++ b/mteb/tasks/Classification/ara/__init__.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from .AJGT import AJGT +from .HotelReviewSentimentClassification import HotelReviewSentimentClassification +from .OnlineStoreReviewSentimentClassification import ( + OnlineStoreReviewSentimentClassification, +) +from .RestaurantReviewSentimentClassification import ( + RestaurantReviewSentimentClassification, +) +from .TweetEmotionClassification import TweetEmotionClassification +from .TweetSarcasmClassification import TweetSarcasmClassification + +__all__ = [ + "TweetSarcasmClassification", + "TweetEmotionClassification", + "RestaurantReviewSentimentClassification", + "HotelReviewSentimentClassification", + "OnlineStoreReviewSentimentClassification", + "AJGT", +] diff --git a/mteb/tasks/Classification/ben/__init__.py b/mteb/tasks/Classification/ben/__init__.py index e69de29bb2..ae96c8b0b3 100644 --- a/mteb/tasks/Classification/ben/__init__.py +++ b/mteb/tasks/Classification/ben/__init__.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from .BengaliDocumentClassification import BengaliDocumentClassification +from .BengaliHateSpeechClassification import BengaliHateSpeechClassification +from .BengaliSentimentAnalysis import BengaliSentimentAnalysis + +__all__ = [ + "BengaliSentimentAnalysis", + "BengaliDocumentClassification", + "BengaliHateSpeechClassification", +] diff --git a/mteb/tasks/Classification/bul/__init__.py b/mteb/tasks/Classification/bul/__init__.py index e69de29bb2..5a5d0fbb3b 100644 --- a/mteb/tasks/Classification/bul/__init__.py +++ b/mteb/tasks/Classification/bul/__init__.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from .BulgarianStoreReviewSentimentClassfication import ( + BulgarianStoreReviewSentimentClassfication, +) + +__all__ = ["BulgarianStoreReviewSentimentClassfication"] diff --git a/mteb/tasks/Classification/ces/__init__.py b/mteb/tasks/Classification/ces/__init__.py index e69de29bb2..e3f0adaa87 100644 --- a/mteb/tasks/Classification/ces/__init__.py +++ b/mteb/tasks/Classification/ces/__init__.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from .CSFDCZMovieReviewSentimentClassification import ( + CSFDCZMovieReviewSentimentClassification, +) +from .CzechProductReviewSentimentClassification import ( + CzechProductReviewSentimentClassification, +) +from .CzechSoMeSentimentClassification import CzechSoMeSentimentClassification +from .CzechSubjectivityClassification import CzechSubjectivityClassification + +__all__ = [ + "CSFDCZMovieReviewSentimentClassification", + "CzechSubjectivityClassification", + "CzechProductReviewSentimentClassification", + "CzechSoMeSentimentClassification", +] diff --git a/mteb/tasks/Classification/dan/__init__.py b/mteb/tasks/Classification/dan/__init__.py index e69de29bb2..f47c1a67fd 100644 --- a/mteb/tasks/Classification/dan/__init__.py +++ b/mteb/tasks/Classification/dan/__init__.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from .AngryTweetsClassification import AngryTweetsClassification +from .DanishPoliticalCommentsClassification import DanishPoliticalCommentsClassification +from .DdiscoCohesionClassification import DdiscoCohesionClassification +from .DKHateClassification import DKHateClassification +from .LccSentimentClassification import LccSentimentClassification + +__all__ = [ + "AngryTweetsClassification", + "DdiscoCohesionClassification", + "DanishPoliticalCommentsClassification", + "DKHateClassification", + "LccSentimentClassification", +] diff --git a/mteb/tasks/Classification/deu/__init__.py b/mteb/tasks/Classification/deu/__init__.py index e69de29bb2..673aafd746 100644 --- a/mteb/tasks/Classification/deu/__init__.py +++ b/mteb/tasks/Classification/deu/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from .GermanPoliticiansTwitterSentimentClassification import ( + GermanPoliticiansTwitterSentimentClassification, +) +from .TenKGnadClassification import TenKGnadClassification + +__all__ = ["TenKGnadClassification", "GermanPoliticiansTwitterSentimentClassification"] diff --git a/mteb/tasks/Classification/ell/__init__.py b/mteb/tasks/Classification/ell/__init__.py index e69de29bb2..6c9f66d55f 100644 --- a/mteb/tasks/Classification/ell/__init__.py +++ b/mteb/tasks/Classification/ell/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .GreekLegalCodeClassification import GreekLegalCodeClassification + +__all__ = ["GreekLegalCodeClassification"] diff --git a/mteb/tasks/Classification/eng/__init__.py b/mteb/tasks/Classification/eng/__init__.py index e69de29bb2..01b996a96d 100644 --- a/mteb/tasks/Classification/eng/__init__.py +++ b/mteb/tasks/Classification/eng/__init__.py @@ -0,0 +1,267 @@ +from __future__ import annotations + +from .AmazonPolarityClassification import AmazonPolarityClassification +from .ArxivClassification import ArxivClassification +from .Banking77Classification import Banking77Classification +from .DBpediaClassification import DBpediaClassification +from .EmotionClassification import EmotionClassification +from .FinancialPhrasebankClassification import FinancialPhrasebankClassification +from .FrenkEnClassification import FrenkEnClassification +from .ImdbClassification import ImdbClassification +from .LegalBenchClassification import ( + CanadaTaxCourtOutcomesLegalBenchClassification, + ContractNLIConfidentialityOfAgreementLegalBenchClassification, + ContractNLIExplicitIdentificationLegalBenchClassification, + ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification, + ContractNLILimitedUseLegalBenchClassification, + ContractNLINoLicensingLegalBenchClassification, + ContractNLINoticeOnCompelledDisclosureLegalBenchClassification, + ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification, + ContractNLIPermissibleCopyLegalBenchClassification, + ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification, + ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification, + ContractNLIReturnOfConfidentialInformationLegalBenchClassification, + ContractNLISharingWithEmployeesLegalBenchClassification, + ContractNLISharingWithThirdPartiesLegalBenchClassification, + ContractNLISurvivalOfObligationsLegalBenchClassification, + CorporateLobbyingLegalBenchClassification, + CUADAffiliateLicenseLicenseeLegalBenchClassification, + CUADAffiliateLicenseLicensorLegalBenchClassification, + CUADAntiAssignmentLegalBenchClassification, + CUADAuditRightsLegalBenchClassification, + CUADCapOnLiabilityLegalBenchClassification, + CUADChangeOfControlLegalBenchClassification, + CUADCompetitiveRestrictionExceptionLegalBenchClassification, + CUADCovenantNotToSueLegalBenchClassification, + CUADEffectiveDateLegalBenchClassification, + CUADExclusivityLegalBenchClassification, + CUADExpirationDateLegalBenchClassification, + CUADGoverningLawLegalBenchClassification, + CUADInsuranceLegalBenchClassification, + CUADIPOwnershipAssignmentLegalBenchClassification, + CUADIrrevocableOrPerpetualLicenseLegalBenchClassification, + CUADJointIPOwnershipLegalBenchClassification, + CUADLicenseGrantLegalBenchClassification, + CUADLiquidatedDamagesLegalBenchClassification, + CUADMinimumCommitmentLegalBenchClassification, + CUADMostFavoredNationLegalBenchClassification, + CUADNonCompeteLegalBenchClassification, + CUADNonDisparagementLegalBenchClassification, + CUADNonTransferableLicenseLegalBenchClassification, + CUADNoSolicitOfCustomersLegalBenchClassification, + CUADNoSolicitOfEmployeesLegalBenchClassification, + CUADNoticePeriodToTerminateRenewalLegalBenchClassification, + CUADPostTerminationServicesLegalBenchClassification, + CUADPriceRestrictionsLegalBenchClassification, + CUADRenewalTermLegalBenchClassification, + CUADRevenueProfitSharingLegalBenchClassification, + CUADRofrRofoRofnLegalBenchClassification, + CUADSourceCodeEscrowLegalBenchClassification, + CUADTerminationForConvenienceLegalBenchClassification, + CUADThirdPartyBeneficiaryLegalBenchClassification, + CUADUncappedLiabilityLegalBenchClassification, + CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification, + CUADVolumeRestrictionLegalBenchClassification, + CUADWarrantyDurationLegalBenchClassification, + DefinitionClassificationLegalBenchClassification, + Diversity1LegalBenchClassification, + Diversity2LegalBenchClassification, + Diversity3LegalBenchClassification, + Diversity4LegalBenchClassification, + Diversity5LegalBenchClassification, + Diversity6LegalBenchClassification, + FunctionOfDecisionSectionLegalBenchClassification, + InsurancePolicyInterpretationLegalBenchClassification, + InternationalCitizenshipQuestionsLegalBenchClassification, + JCrewBlockerLegalBenchClassification, + LearnedHandsBenefitsLegalBenchClassification, + LearnedHandsBusinessLegalBenchClassification, + LearnedHandsConsumerLegalBenchClassification, + LearnedHandsCourtsLegalBenchClassification, + LearnedHandsCrimeLegalBenchClassification, + LearnedHandsDivorceLegalBenchClassification, + LearnedHandsDomesticViolenceLegalBenchClassification, + LearnedHandsEducationLegalBenchClassification, + LearnedHandsEmploymentLegalBenchClassification, + LearnedHandsEstatesLegalBenchClassification, + LearnedHandsFamilyLegalBenchClassification, + LearnedHandsHealthLegalBenchClassification, + LearnedHandsHousingLegalBenchClassification, + LearnedHandsImmigrationLegalBenchClassification, + LearnedHandsTortsLegalBenchClassification, + LearnedHandsTrafficLegalBenchClassification, + LegalReasoningCausalityLegalBenchClassification, + MAUDLegalBenchClassification, + NYSJudicialEthicsLegalBenchClassification, + OPP115DataRetentionLegalBenchClassification, + OPP115DataSecurityLegalBenchClassification, + OPP115DoNotTrackLegalBenchClassification, + OPP115FirstPartyCollectionUseLegalBenchClassification, + OPP115InternationalAndSpecificAudiencesLegalBenchClassification, + OPP115PolicyChangeLegalBenchClassification, + OPP115ThirdPartySharingCollectionLegalBenchClassification, + OPP115UserAccessEditAndDeletionLegalBenchClassification, + OPP115UserChoiceControlLegalBenchClassification, + OralArgumentQuestionPurposeLegalBenchClassification, + OverrulingLegalBenchClassification, + PersonalJurisdictionLegalBenchClassification, + PROALegalBenchClassification, + SCDBPAccountabilityLegalBenchClassification, + SCDBPAuditsLegalBenchClassification, + SCDBPCertificationLegalBenchClassification, + SCDBPTrainingLegalBenchClassification, + SCDBPVerificationLegalBenchClassification, + SCDDAccountabilityLegalBenchClassification, + SCDDAuditsLegalBenchClassification, + SCDDCertificationLegalBenchClassification, + SCDDTrainingLegalBenchClassification, + SCDDVerificationLegalBenchClassification, + TelemarketingSalesRuleLegalBenchClassification, + TextualismToolDictionariesLegalBenchClassification, + TextualismToolPlainLegalBenchClassification, + UCCVCommonLawLegalBenchClassification, + UnfairTOSLegalBenchClassification, +) +from .NewsClassification import NewsClassification +from .PatentClassification import PatentClassification +from .PoemSentimentClassification import PoemSentimentClassification +from .ToxicChatClassification import ToxicChatClassification +from .ToxicConversationsClassification import ToxicConversationsClassification +from .TweetSentimentExtractionClassification import ( + TweetSentimentExtractionClassification, +) +from .TweetTopicSingleClassification import TweetTopicSingleClassification +from .YahooAnswersTopicsClassification import YahooAnswersTopicsClassification +from .YelpReviewFullClassification import YelpReviewFullClassification + +__all__ = [ + "YahooAnswersTopicsClassification", + "CUADAffiliateLicenseLicenseeLegalBenchClassification", + "CUADAffiliateLicenseLicensorLegalBenchClassification", + "CUADAntiAssignmentLegalBenchClassification", + "CUADAuditRightsLegalBenchClassification", + "CUADCapOnLiabilityLegalBenchClassification", + "CUADChangeOfControlLegalBenchClassification", + "CUADCompetitiveRestrictionExceptionLegalBenchClassification", + "CUADCovenantNotToSueLegalBenchClassification", + "CUADEffectiveDateLegalBenchClassification", + "CUADExclusivityLegalBenchClassification", + "CUADExpirationDateLegalBenchClassification", + "CUADGoverningLawLegalBenchClassification", + "CUADIPOwnershipAssignmentLegalBenchClassification", + "CUADInsuranceLegalBenchClassification", + "CUADIrrevocableOrPerpetualLicenseLegalBenchClassification", + "CUADJointIPOwnershipLegalBenchClassification", + "CUADLicenseGrantLegalBenchClassification", + "CUADLiquidatedDamagesLegalBenchClassification", + "CUADMinimumCommitmentLegalBenchClassification", + "CUADMostFavoredNationLegalBenchClassification", + "CUADNoSolicitOfCustomersLegalBenchClassification", + "CUADNoSolicitOfEmployeesLegalBenchClassification", + "CUADNonCompeteLegalBenchClassification", + "CUADNonDisparagementLegalBenchClassification", + "CUADNonTransferableLicenseLegalBenchClassification", + "CUADNoticePeriodToTerminateRenewalLegalBenchClassification", + "CUADPostTerminationServicesLegalBenchClassification", + "CUADPriceRestrictionsLegalBenchClassification", + "CUADRenewalTermLegalBenchClassification", + "CUADRevenueProfitSharingLegalBenchClassification", + "CUADRofrRofoRofnLegalBenchClassification", + "CUADSourceCodeEscrowLegalBenchClassification", + "CUADTerminationForConvenienceLegalBenchClassification", + "CUADThirdPartyBeneficiaryLegalBenchClassification", + "CUADUncappedLiabilityLegalBenchClassification", + "CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification", + "CUADVolumeRestrictionLegalBenchClassification", + "CUADWarrantyDurationLegalBenchClassification", + "CanadaTaxCourtOutcomesLegalBenchClassification", + "ContractNLIConfidentialityOfAgreementLegalBenchClassification", + "ContractNLIExplicitIdentificationLegalBenchClassification", + "ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification", + "ContractNLILimitedUseLegalBenchClassification", + "ContractNLINoLicensingLegalBenchClassification", + "ContractNLINoticeOnCompelledDisclosureLegalBenchClassification", + "ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification", + "ContractNLIPermissibleCopyLegalBenchClassification", + "ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification", + "ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification", + "ContractNLIReturnOfConfidentialInformationLegalBenchClassification", + "ContractNLISharingWithEmployeesLegalBenchClassification", + "ContractNLISharingWithThirdPartiesLegalBenchClassification", + "ContractNLISurvivalOfObligationsLegalBenchClassification", + "CorporateLobbyingLegalBenchClassification", + "DefinitionClassificationLegalBenchClassification", + "Diversity1LegalBenchClassification", + "Diversity2LegalBenchClassification", + "Diversity3LegalBenchClassification", + "Diversity4LegalBenchClassification", + "Diversity5LegalBenchClassification", + "Diversity6LegalBenchClassification", + "FunctionOfDecisionSectionLegalBenchClassification", + "InsurancePolicyInterpretationLegalBenchClassification", + "InternationalCitizenshipQuestionsLegalBenchClassification", + "JCrewBlockerLegalBenchClassification", + "LearnedHandsBenefitsLegalBenchClassification", + "LearnedHandsBusinessLegalBenchClassification", + "LearnedHandsConsumerLegalBenchClassification", + "LearnedHandsCourtsLegalBenchClassification", + "LearnedHandsCrimeLegalBenchClassification", + "LearnedHandsDivorceLegalBenchClassification", + "LearnedHandsDomesticViolenceLegalBenchClassification", + "LearnedHandsEducationLegalBenchClassification", + "LearnedHandsEmploymentLegalBenchClassification", + "LearnedHandsEstatesLegalBenchClassification", + "LearnedHandsFamilyLegalBenchClassification", + "LearnedHandsHealthLegalBenchClassification", + "LearnedHandsHousingLegalBenchClassification", + "LearnedHandsImmigrationLegalBenchClassification", + "LearnedHandsTortsLegalBenchClassification", + "LearnedHandsTrafficLegalBenchClassification", + "LegalReasoningCausalityLegalBenchClassification", + "MAUDLegalBenchClassification", + "NYSJudicialEthicsLegalBenchClassification", + "OPP115DataRetentionLegalBenchClassification", + "OPP115DataSecurityLegalBenchClassification", + "OPP115DoNotTrackLegalBenchClassification", + "OPP115FirstPartyCollectionUseLegalBenchClassification", + "OPP115InternationalAndSpecificAudiencesLegalBenchClassification", + "OPP115PolicyChangeLegalBenchClassification", + "OPP115ThirdPartySharingCollectionLegalBenchClassification", + "OPP115UserAccessEditAndDeletionLegalBenchClassification", + "OPP115UserChoiceControlLegalBenchClassification", + "OralArgumentQuestionPurposeLegalBenchClassification", + "OverrulingLegalBenchClassification", + "PROALegalBenchClassification", + "PersonalJurisdictionLegalBenchClassification", + "SCDBPAccountabilityLegalBenchClassification", + "SCDBPAuditsLegalBenchClassification", + "SCDBPCertificationLegalBenchClassification", + "SCDBPTrainingLegalBenchClassification", + "SCDBPVerificationLegalBenchClassification", + "SCDDAccountabilityLegalBenchClassification", + "SCDDAuditsLegalBenchClassification", + "SCDDCertificationLegalBenchClassification", + "SCDDTrainingLegalBenchClassification", + "SCDDVerificationLegalBenchClassification", + "TelemarketingSalesRuleLegalBenchClassification", + "TextualismToolDictionariesLegalBenchClassification", + "TextualismToolPlainLegalBenchClassification", + "UCCVCommonLawLegalBenchClassification", + "UnfairTOSLegalBenchClassification", + "FinancialPhrasebankClassification", + "DBpediaClassification", + "FrenkEnClassification", + "TweetTopicSingleClassification", + "AmazonPolarityClassification", + "NewsClassification", + "ToxicChatClassification", + "YelpReviewFullClassification", + "ToxicConversationsClassification", + "TweetSentimentExtractionClassification", + "PatentClassification", + "ImdbClassification", + "ArxivClassification", + "EmotionClassification", + "PoemSentimentClassification", + "Banking77Classification", +] diff --git a/mteb/tasks/Classification/est/__init__.py b/mteb/tasks/Classification/est/__init__.py new file mode 100644 index 0000000000..ba49956beb --- /dev/null +++ b/mteb/tasks/Classification/est/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .estonian_valence import EstonianValenceClassification + +__all__ = ["EstonianValenceClassification"] diff --git a/mteb/tasks/Classification/fas/__init__.py b/mteb/tasks/Classification/fas/__init__.py index e69de29bb2..1f35246357 100644 --- a/mteb/tasks/Classification/fas/__init__.py +++ b/mteb/tasks/Classification/fas/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .PersianFoodSentimentClassification import PersianFoodSentimentClassification + +__all__ = ["PersianFoodSentimentClassification"] diff --git a/mteb/tasks/Classification/fil/__init__.py b/mteb/tasks/Classification/fil/__init__.py index e69de29bb2..bdc37399f2 100644 --- a/mteb/tasks/Classification/fil/__init__.py +++ b/mteb/tasks/Classification/fil/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .FilipinoHateSpeechClassification import FilipinoHateSpeechClassification +from .FilipinoShopeeReviewsClassification import FilipinoShopeeReviewsClassification + +__all__ = ["FilipinoHateSpeechClassification", "FilipinoShopeeReviewsClassification"] diff --git a/mteb/tasks/Classification/fin/__init__.py b/mteb/tasks/Classification/fin/__init__.py index e69de29bb2..c1d2cf4208 100644 --- a/mteb/tasks/Classification/fin/__init__.py +++ b/mteb/tasks/Classification/fin/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .FinToxicityClassification import FinToxicityClassification + +__all__ = ["FinToxicityClassification"] diff --git a/mteb/tasks/Classification/fra/__init__.py b/mteb/tasks/Classification/fra/__init__.py index e69de29bb2..cff9213baf 100644 --- a/mteb/tasks/Classification/fra/__init__.py +++ b/mteb/tasks/Classification/fra/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .FrenchBookReviews import FrenchBookReviews +from .MovieReviewSentimentClassification import MovieReviewSentimentClassification + +__all__ = ["MovieReviewSentimentClassification", "FrenchBookReviews"] diff --git a/mteb/tasks/Classification/guj/__init__.py b/mteb/tasks/Classification/guj/__init__.py index e69de29bb2..211eb3bf86 100644 --- a/mteb/tasks/Classification/guj/__init__.py +++ b/mteb/tasks/Classification/guj/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .GujaratiNewsClassification import GujaratiNewsClassification + +__all__ = ["GujaratiNewsClassification"] diff --git a/mteb/tasks/Classification/heb/__init__.py b/mteb/tasks/Classification/heb/__init__.py index e69de29bb2..1e6a039dd2 100644 --- a/mteb/tasks/Classification/heb/__init__.py +++ b/mteb/tasks/Classification/heb/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .HebrewSentimentAnalysis import HebrewSentimentAnalysis + +__all__ = ["HebrewSentimentAnalysis"] diff --git a/mteb/tasks/Classification/hin/__init__.py b/mteb/tasks/Classification/hin/__init__.py index e69de29bb2..da7a5dee51 100644 --- a/mteb/tasks/Classification/hin/__init__.py +++ b/mteb/tasks/Classification/hin/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .HindiDiscourseClassification import HindiDiscourseClassification +from .SentimentAnalysisHindi import SentimentAnalysisHindi + +__all__ = ["HindiDiscourseClassification", "SentimentAnalysisHindi"] diff --git a/mteb/tasks/Classification/hrv/__init__.py b/mteb/tasks/Classification/hrv/__init__.py index e69de29bb2..07be541615 100644 --- a/mteb/tasks/Classification/hrv/__init__.py +++ b/mteb/tasks/Classification/hrv/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .FrenkHrClassification import FrenkHrClassification + +__all__ = ["FrenkHrClassification"] diff --git a/mteb/tasks/Classification/ind/__init__.py b/mteb/tasks/Classification/ind/__init__.py index e69de29bb2..e1efdfa7ce 100644 --- a/mteb/tasks/Classification/ind/__init__.py +++ b/mteb/tasks/Classification/ind/__init__.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from .IndonesianIdClickbaitClassification import IndonesianIdClickbaitClassification +from .IndonesianMongabayConservationClassification import ( + IndonesianMongabayConservationClassification, +) + +__all__ = [ + "IndonesianIdClickbaitClassification", + "IndonesianMongabayConservationClassification", +] diff --git a/mteb/tasks/Classification/ita/__init__.py b/mteb/tasks/Classification/ita/__init__.py index e69de29bb2..4f987f9bfc 100644 --- a/mteb/tasks/Classification/ita/__init__.py +++ b/mteb/tasks/Classification/ita/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from .ItaCaseholdClassification import ItaCaseholdClassification +from .ItalianLinguistAcceptabilityClassification import ( + ItalianLinguisticAcceptabilityClassification, +) + +__all__ = ["ItaCaseholdClassification", "ItalianLinguisticAcceptabilityClassification"] diff --git a/mteb/tasks/Classification/jav/__init__.py b/mteb/tasks/Classification/jav/__init__.py index e69de29bb2..417dae6e1f 100644 --- a/mteb/tasks/Classification/jav/__init__.py +++ b/mteb/tasks/Classification/jav/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .JavaneseIMDBClassification import JavaneseIMDBClassification + +__all__ = ["JavaneseIMDBClassification"] diff --git a/mteb/tasks/Classification/jpn/__init__.py b/mteb/tasks/Classification/jpn/__init__.py new file mode 100644 index 0000000000..6eca935f2f --- /dev/null +++ b/mteb/tasks/Classification/jpn/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .WRIMEClassification import WRIMEClassification + +__all__ = ["WRIMEClassification"] diff --git a/mteb/tasks/Classification/kan/__init__.py b/mteb/tasks/Classification/kan/__init__.py index e69de29bb2..faa9d78ba1 100644 --- a/mteb/tasks/Classification/kan/__init__.py +++ b/mteb/tasks/Classification/kan/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .KannadaNewsClassification import KannadaNewsClassification + +__all__ = ["KannadaNewsClassification"] diff --git a/mteb/tasks/Classification/kat/__init__.py b/mteb/tasks/Classification/kat/__init__.py index e69de29bb2..c26b90d67d 100644 --- a/mteb/tasks/Classification/kat/__init__.py +++ b/mteb/tasks/Classification/kat/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .GeorgianSentimentClassification import GeorgianSentimentClassification + +__all__ = ["GeorgianSentimentClassification"] diff --git a/mteb/tasks/Classification/kor/__init__.py b/mteb/tasks/Classification/kor/__init__.py index e69de29bb2..bb6d78deb1 100644 --- a/mteb/tasks/Classification/kor/__init__.py +++ b/mteb/tasks/Classification/kor/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from .KlueTC import KlueTC +from .KorFin import KorFin +from .KorHateClassification import KorHateClassification +from .KorSarcasmClassification import KorSarcasmClassification + +__all__ = ["KorSarcasmClassification", "KorHateClassification", "KorFin", "KlueTC"] diff --git a/mteb/tasks/Classification/kur/__init__.py b/mteb/tasks/Classification/kur/__init__.py index e69de29bb2..e72b9aa6a0 100644 --- a/mteb/tasks/Classification/kur/__init__.py +++ b/mteb/tasks/Classification/kur/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .KurdishSentimentClassification import KurdishSentimentClassification + +__all__ = ["KurdishSentimentClassification"] diff --git a/mteb/tasks/Classification/mal/__init__.py b/mteb/tasks/Classification/mal/__init__.py index e69de29bb2..2ba5994626 100644 --- a/mteb/tasks/Classification/mal/__init__.py +++ b/mteb/tasks/Classification/mal/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .MalayalamNewsClassification import MalayalamNewsClassification + +__all__ = ["MalayalamNewsClassification"] diff --git a/mteb/tasks/Classification/mar/__init__.py b/mteb/tasks/Classification/mar/__init__.py index e69de29bb2..3d34c2776a 100644 --- a/mteb/tasks/Classification/mar/__init__.py +++ b/mteb/tasks/Classification/mar/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .MarathiNewsClassification import MarathiNewsClassification + +__all__ = ["MarathiNewsClassification"] diff --git a/mteb/tasks/Classification/mkd/__init__.py b/mteb/tasks/Classification/mkd/__init__.py index e69de29bb2..cf4c140af8 100644 --- a/mteb/tasks/Classification/mkd/__init__.py +++ b/mteb/tasks/Classification/mkd/__init__.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from .MacedonianTweetSentimentClassification import ( + MacedonianTweetSentimentClassification, +) + +__all__ = ["MacedonianTweetSentimentClassification"] diff --git a/mteb/tasks/Classification/multilingual/__init__.py b/mteb/tasks/Classification/multilingual/__init__.py index e69de29bb2..440ac7055b 100644 --- a/mteb/tasks/Classification/multilingual/__init__.py +++ b/mteb/tasks/Classification/multilingual/__init__.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +from .AfriSentiClassification import AfriSentiClassification +from .AfriSentiLangClassification import AfriSentiLangClassification +from .AmazonCounterfactualClassification import AmazonCounterfactualClassification +from .AmazonReviewsClassification import AmazonReviewsClassification +from .CataloniaTweetClassification import CataloniaTweetClassification +from .CyrillicTurkicLangClassification import CyrillicTurkicLangClassification +from .HinDialectClassification import HinDialectClassification +from .IndicLangClassification import IndicLangClassification +from .IndicNLPNewsClassification import IndicNLPNewsClassification +from .IndicSentimentClassification import IndicSentimentClassification +from .LanguageClassification import LanguageClassification +from .MasakhaNEWSClassification import MasakhaNEWSClassification +from .MassiveIntentClassification import MassiveIntentClassification +from .MassiveScenarioClassification import MassiveScenarioClassification +from .MTOPDomainClassification import MTOPDomainClassification +from .MTOPIntentClassification import MTOPIntentClassification +from .MultiHateClassification import MultiHateClassification +from .MultilingualSentimentClassification import MultilingualSentimentClassification +from .NaijaSenti import NaijaSenti +from .NordicLangClassification import NordicLangClassification +from .NusaParagraphEmotionClassification import NusaParagraphEmotionClassification +from .NusaParagraphTopicClassification import NusaParagraphTopicClassification +from .NusaXSenti import NusaXSentiClassification +from .ScalaClassification import ScalaClassification +from .SIB200Classification import SIB200Classification +from .SouthAfricanLangClassification import SouthAfricanLangClassification +from .SwissJudgementClassification import SwissJudgementClassification +from .TurkicClassification import TurkicClassification +from .TweetSentimentClassification import TweetSentimentClassification + +__all__ = [ + "IndicLangClassification", + "SouthAfricanLangClassification", + "SwissJudgementClassification", + "AmazonReviewsClassification", + "NaijaSenti", + "TurkicClassification", + "ScalaClassification", + "MultilingualSentimentClassification", + "SIB200Classification", + "NordicLangClassification", + "NusaParagraphTopicClassification", + "CyrillicTurkicLangClassification", + "IndicNLPNewsClassification", + "MassiveScenarioClassification", + "MTOPIntentClassification", + "NusaParagraphEmotionClassification", + "MultiHateClassification", + "AfriSentiClassification", + "IndicSentimentClassification", + "LanguageClassification", + "AfriSentiLangClassification", + "NusaXSentiClassification", + "MTOPDomainClassification", + "HinDialectClassification", + "CataloniaTweetClassification", + "TweetSentimentClassification", + "MassiveIntentClassification", + "AmazonCounterfactualClassification", + "MasakhaNEWSClassification", +] diff --git a/mteb/tasks/Classification/mya/__init__.py b/mteb/tasks/Classification/mya/__init__.py index e69de29bb2..a209ee2a3e 100644 --- a/mteb/tasks/Classification/mya/__init__.py +++ b/mteb/tasks/Classification/mya/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .MyanmarNews import MyanmarNews + +__all__ = ["MyanmarNews"] diff --git a/mteb/tasks/Classification/nep/__init__.py b/mteb/tasks/Classification/nep/__init__.py index e69de29bb2..b18cae3209 100644 --- a/mteb/tasks/Classification/nep/__init__.py +++ b/mteb/tasks/Classification/nep/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .NepaliNewsClassification import NepaliNewsClassification + +__all__ = ["NepaliNewsClassification"] diff --git a/mteb/tasks/Classification/nld/__init__.py b/mteb/tasks/Classification/nld/__init__.py index e69de29bb2..17fae09dda 100644 --- a/mteb/tasks/Classification/nld/__init__.py +++ b/mteb/tasks/Classification/nld/__init__.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from .DutchBookReviewSentimentClassification import ( + DutchBookReviewSentimentClassification, +) + +__all__ = ["DutchBookReviewSentimentClassification"] diff --git a/mteb/tasks/Classification/nob/__init__.py b/mteb/tasks/Classification/nob/__init__.py index e69de29bb2..5a545c2346 100644 --- a/mteb/tasks/Classification/nob/__init__.py +++ b/mteb/tasks/Classification/nob/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .NoRecClassification import NoRecClassification +from .NorwegianParliamentClassification import NorwegianParliamentClassification + +__all__ = ["NorwegianParliamentClassification", "NoRecClassification"] diff --git a/mteb/tasks/Classification/ory/__init__.py b/mteb/tasks/Classification/ory/__init__.py new file mode 100644 index 0000000000..775f171ff0 --- /dev/null +++ b/mteb/tasks/Classification/ory/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .OdiaNewsClassification import OdiaNewsClassification + +__all__ = ["OdiaNewsClassification"] diff --git a/mteb/tasks/Classification/pan/__init__.py b/mteb/tasks/Classification/pan/__init__.py new file mode 100644 index 0000000000..cfedf3155f --- /dev/null +++ b/mteb/tasks/Classification/pan/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .PunjabiNewsClassification import PunjabiNewsClassification + +__all__ = ["PunjabiNewsClassification"] diff --git a/mteb/tasks/Classification/pol/__init__.py b/mteb/tasks/Classification/pol/__init__.py index e69de29bb2..a3531bea1b 100644 --- a/mteb/tasks/Classification/pol/__init__.py +++ b/mteb/tasks/Classification/pol/__init__.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from .PolishClassification import ( + AllegroReviewsClassification, + CbdClassification, + PacClassification, + PolEmo2InClassification, + PolEmo2OutClassification, +) + +__all__ = [ + "AllegroReviewsClassification", + "CbdClassification", + "PacClassification", + "PolEmo2InClassification", + "PolEmo2OutClassification", +] diff --git a/mteb/tasks/Classification/por/__init__.py b/mteb/tasks/Classification/por/__init__.py index e69de29bb2..4d202d5117 100644 --- a/mteb/tasks/Classification/por/__init__.py +++ b/mteb/tasks/Classification/por/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .HateSpeechPortugueseClassification import HateSpeechPortugueseClassification + +__all__ = ["HateSpeechPortugueseClassification"] diff --git a/mteb/tasks/Classification/ron/__init__.py b/mteb/tasks/Classification/ron/__init__.py index e69de29bb2..f8473c0c9c 100644 --- a/mteb/tasks/Classification/ron/__init__.py +++ b/mteb/tasks/Classification/ron/__init__.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from .Moroco import Moroco +from .RomanianReviewsSentiment import RomanianReviewsSentiment +from .RomanianSentimentClassification import RomanianSentimentClassification + +__all__ = ["RomanianReviewsSentiment", "Moroco", "RomanianSentimentClassification"] diff --git a/mteb/tasks/Classification/rus/__init__.py b/mteb/tasks/Classification/rus/__init__.py index e69de29bb2..2fada5e4e7 100644 --- a/mteb/tasks/Classification/rus/__init__.py +++ b/mteb/tasks/Classification/rus/__init__.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from .GeoreviewClassification import GeoreviewClassification +from .HeadlineClassification import HeadlineClassification +from .InappropriatenessClassification import InappropriatenessClassification +from .KinopoiskClassification import KinopoiskClassification +from .RuReviewsClassification import RuReviewsClassification +from .RuSciBenchGRNTIClassification import RuSciBenchGRNTIClassification +from .RuSciBenchOECDClassification import RuSciBenchOECDClassification + +__all__ = [ + "KinopoiskClassification", + "HeadlineClassification", + "InappropriatenessClassification", + "RuSciBenchGRNTIClassification", + "RuSciBenchOECDClassification", + "RuReviewsClassification", + "GeoreviewClassification", +] diff --git a/mteb/tasks/Classification/san/__init__.py b/mteb/tasks/Classification/san/__init__.py index e69de29bb2..2ef456ae7d 100644 --- a/mteb/tasks/Classification/san/__init__.py +++ b/mteb/tasks/Classification/san/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SanskritShlokasClassification import SanskritShlokasClassification + +__all__ = ["SanskritShlokasClassification"] diff --git a/mteb/tasks/Classification/sin/__init__.py b/mteb/tasks/Classification/sin/__init__.py index e69de29bb2..ee4b6c1cd3 100644 --- a/mteb/tasks/Classification/sin/__init__.py +++ b/mteb/tasks/Classification/sin/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .SinhalaNewsClassification import SinhalaNewsClassification +from .SinhalaNewsSourceClassification import SinhalaNewsSourceClassification + +__all__ = ["SinhalaNewsSourceClassification", "SinhalaNewsClassification"] diff --git a/mteb/tasks/Classification/slk/__init__.py b/mteb/tasks/Classification/slk/__init__.py new file mode 100644 index 0000000000..d58f1a2b33 --- /dev/null +++ b/mteb/tasks/Classification/slk/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from .CSFDSKMovieReviewSentimentClassification import ( + CSFDSKMovieReviewSentimentClassification, +) +from .SlovakHateSpeechClassification import SlovakHateSpeechClassification + +__all__ = ["CSFDSKMovieReviewSentimentClassification", "SlovakHateSpeechClassification"] diff --git a/mteb/tasks/Classification/slv/__init__.py b/mteb/tasks/Classification/slv/__init__.py index e69de29bb2..57605d8b60 100644 --- a/mteb/tasks/Classification/slv/__init__.py +++ b/mteb/tasks/Classification/slv/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .FrenkSlClassification import FrenkSlClassification + +__all__ = ["FrenkSlClassification"] diff --git a/mteb/tasks/Classification/spa/__init__.py b/mteb/tasks/Classification/spa/__init__.py index e69de29bb2..3150b2ddcd 100644 --- a/mteb/tasks/Classification/spa/__init__.py +++ b/mteb/tasks/Classification/spa/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .SpanishNewsClassification import SpanishNewsClassification +from .SpanishSentimentClassification import SpanishSentimentClassification + +__all__ = ["SpanishNewsClassification", "SpanishSentimentClassification"] diff --git a/mteb/tasks/Classification/ssw/__init__.py b/mteb/tasks/Classification/ssw/__init__.py index e69de29bb2..8de706169c 100644 --- a/mteb/tasks/Classification/ssw/__init__.py +++ b/mteb/tasks/Classification/ssw/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SiswatiNewsClassification import SiswatiNewsClassification + +__all__ = ["SiswatiNewsClassification"] diff --git a/mteb/tasks/Classification/svk/__init__.py b/mteb/tasks/Classification/svk/__init__.py index e69de29bb2..9c0c6608e4 100644 --- a/mteb/tasks/Classification/svk/__init__.py +++ b/mteb/tasks/Classification/svk/__init__.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from .SlovakMovieReviewSentimentClassification import ( + SlovakMovieReviewSentimentClassification, +) + +__all__ = ["SlovakMovieReviewSentimentClassification"] diff --git a/mteb/tasks/Classification/swa/__init__.py b/mteb/tasks/Classification/swa/__init__.py index e69de29bb2..f9b04ebc77 100644 --- a/mteb/tasks/Classification/swa/__init__.py +++ b/mteb/tasks/Classification/swa/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SwahiliNewsClassification import SwahiliNewsClassification + +__all__ = ["SwahiliNewsClassification"] diff --git a/mteb/tasks/Classification/swe/__init__.py b/mteb/tasks/Classification/swe/__init__.py index e69de29bb2..73c69f1fb2 100644 --- a/mteb/tasks/Classification/swe/__init__.py +++ b/mteb/tasks/Classification/swe/__init__.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from .DalajClassification import DalajClassification +from .SwedishSentimentClassification import SwedishSentimentClassification +from .SweRecClassification import SweRecClassification + +__all__ = [ + "DalajClassification", + "SweRecClassification", + "SwedishSentimentClassification", +] diff --git a/mteb/tasks/Classification/tam/__init__.py b/mteb/tasks/Classification/tam/__init__.py index e69de29bb2..ac93b4e50e 100644 --- a/mteb/tasks/Classification/tam/__init__.py +++ b/mteb/tasks/Classification/tam/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .TamilNewsClassification import TamilNewsClassification + +__all__ = ["TamilNewsClassification"] diff --git a/mteb/tasks/Classification/tel/__init__.py b/mteb/tasks/Classification/tel/__init__.py index e69de29bb2..ccd1ef814a 100644 --- a/mteb/tasks/Classification/tel/__init__.py +++ b/mteb/tasks/Classification/tel/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .TeluguAndhraJyotiNewsClassification import TeluguAndhraJyotiNewsClassification + +__all__ = ["TeluguAndhraJyotiNewsClassification"] diff --git a/mteb/tasks/Classification/tha/__init__.py b/mteb/tasks/Classification/tha/__init__.py index e69de29bb2..16df75e134 100644 --- a/mteb/tasks/Classification/tha/__init__.py +++ b/mteb/tasks/Classification/tha/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .WisesightSentimentClassification import WisesightSentimentClassification +from .WongnaiReviewsClassification import WongnaiReviewsClassification + +__all__ = ["WisesightSentimentClassification", "WongnaiReviewsClassification"] diff --git a/mteb/tasks/Classification/tsn/__init__.py b/mteb/tasks/Classification/tsn/__init__.py index e69de29bb2..f767bb4aae 100644 --- a/mteb/tasks/Classification/tsn/__init__.py +++ b/mteb/tasks/Classification/tsn/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .TswanaNewsClassification import TswanaNewsClassification + +__all__ = ["TswanaNewsClassification"] diff --git a/mteb/tasks/Classification/tur/__init__.py b/mteb/tasks/Classification/tur/__init__.py index 8b13789179..c5859194ad 100644 --- a/mteb/tasks/Classification/tur/__init__.py +++ b/mteb/tasks/Classification/tur/__init__.py @@ -1 +1,9 @@ +from __future__ import annotations +from .TurkishMovieSentimentClassification import TurkishMovieSentimentClassification +from .TurkishProductSentimentClassification import TurkishProductSentimentClassification + +__all__ = [ + "TurkishProductSentimentClassification", + "TurkishMovieSentimentClassification", +] diff --git a/mteb/tasks/Classification/ukr/__init__.py b/mteb/tasks/Classification/ukr/__init__.py index e69de29bb2..094a590c33 100644 --- a/mteb/tasks/Classification/ukr/__init__.py +++ b/mteb/tasks/Classification/ukr/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .UkrFormalityClassification import UkrFormalityClassification + +__all__ = ["UkrFormalityClassification"] diff --git a/mteb/tasks/Classification/urd/__init__.py b/mteb/tasks/Classification/urd/__init__.py index e69de29bb2..7d5b5eab14 100644 --- a/mteb/tasks/Classification/urd/__init__.py +++ b/mteb/tasks/Classification/urd/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .UrduRomanSentimentClassification import UrduRomanSentimentClassification + +__all__ = ["UrduRomanSentimentClassification"] diff --git a/mteb/tasks/Classification/vie/__init__.py b/mteb/tasks/Classification/vie/__init__.py index e69de29bb2..bbf7e9d2de 100644 --- a/mteb/tasks/Classification/vie/__init__.py +++ b/mteb/tasks/Classification/vie/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .VieStudentFeedbackClassification import VieStudentFeedbackClassification + +__all__ = ["VieStudentFeedbackClassification"] diff --git a/mteb/tasks/Classification/zho/__init__.py b/mteb/tasks/Classification/zho/__init__.py index e69de29bb2..a0b55bf883 100644 --- a/mteb/tasks/Classification/zho/__init__.py +++ b/mteb/tasks/Classification/zho/__init__.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from .CMTEBClassification import ( + IFlyTek, + JDReview, + MultilingualSentiment, + OnlineShopping, + TNews, + Waimai, +) +from .YueOpenriceReviewClassification import YueOpenriceReviewClassification + +__all__ = [ + "IFlyTek", + "JDReview", + "MultilingualSentiment", + "OnlineShopping", + "TNews", + "Waimai", + "YueOpenriceReviewClassification", +] diff --git a/mteb/tasks/Classification/zul/__init__.py b/mteb/tasks/Classification/zul/__init__.py index e69de29bb2..1217351e0e 100644 --- a/mteb/tasks/Classification/zul/__init__.py +++ b/mteb/tasks/Classification/zul/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .IsiZuluNewsClassification import IsiZuluNewsClassification + +__all__ = ["IsiZuluNewsClassification"] diff --git a/mteb/tasks/Clustering/__init__.py b/mteb/tasks/Clustering/__init__.py index 014796a4cb..f6b683669f 100644 --- a/mteb/tasks/Clustering/__init__.py +++ b/mteb/tasks/Clustering/__init__.py @@ -1,45 +1,179 @@ from __future__ import annotations -from .deu.BlurbsClusteringP2P import * -from .deu.BlurbsClusteringS2S import * -from .deu.TenKGnadClusteringP2P import * -from .deu.TenKGnadClusteringS2S import * -from .eng.ArxivClusteringP2P import * -from .eng.ArxivClusteringS2S import * -from .eng.ArXivHierarchicalClustering import * -from .eng.BigPatentClustering import * -from .eng.BiorxivClusteringP2P import * -from .eng.BiorxivClusteringS2S import * -from .eng.MedrxivClusteringP2P import * -from .eng.MedrxivClusteringS2S import * -from .eng.RedditClustering import * -from .eng.RedditClusteringP2P import * -from .eng.StackExchangeClustering import * -from .eng.StackExchangeClusteringP2P import * -from .eng.TwentyNewsgroupsClustering import * -from .eng.WikiCitiesClustering import * -from .fra.AlloProfClusteringP2P import * -from .fra.AlloProfClusteringS2S import * -from .fra.HALClusteringS2S import * -from .jpn.LivedoorNewsClustering import * -from .jpn.MewsC16JaClustering import * -from .multilingual.IndicReviewsClusteringP2P import * -from .multilingual.MasakhaNEWSClusteringP2P import * -from .multilingual.MasakhaNEWSClusteringS2S import * -from .multilingual.MLSUMClusteringP2P import * -from .multilingual.MLSUMClusteringS2S import * -from .multilingual.SIB200ClusteringS2S import * -from .multilingual.WikiClusteringP2P import * -from .nob.snl_clustering import * -from .nob.SNLHierarchicalClustering import * -from .nob.vg_clustering import * -from .nob.VGHierarchicalClustering import * -from .pol.PolishClustering import * -from .rom.RomaniBibleClustering import * -from .rus.GeoreviewClusteringP2P import * -from .rus.RuSciBenchGRNTIClusteringP2P import * -from .rus.RuSciBenchOECDClusteringP2P import * -from .spa.SpanishNewsClusteringP2P import * -from .swe.swedn_clustering import * -from .swe.SwednClustering import * -from .zho.CMTEBClustering import * +from .deu import ( + BlurbsClusteringP2P, + BlurbsClusteringP2PFast, + BlurbsClusteringS2S, + BlurbsClusteringS2SFast, + TenKGnadClusteringP2P, + TenKGnadClusteringP2PFast, + TenKGnadClusteringS2S, + TenKGnadClusteringS2SFast, +) +from .eng import ( + ArxivClusteringP2P, + ArxivClusteringP2PFast, + ArxivClusteringS2S, + ArXivHierarchicalClusteringP2P, + ArXivHierarchicalClusteringS2S, + BigPatentClustering, + BigPatentClusteringFast, + BiorxivClusteringP2P, + BiorxivClusteringP2PFast, + BiorxivClusteringS2S, + BiorxivClusteringS2SFast, + MedrxivClusteringP2P, + MedrxivClusteringP2PFast, + MedrxivClusteringS2S, + MedrxivClusteringS2SFast, + RedditClustering, + RedditClusteringP2P, + RedditFastClusteringP2P, + RedditFastClusteringS2S, + StackExchangeClustering, + StackExchangeClusteringFast, + StackExchangeClusteringP2P, + StackExchangeClusteringP2PFast, + TwentyNewsgroupsClustering, + TwentyNewsgroupsClusteringFast, + WikiCitiesClustering, +) +from .fra import ( + AlloProfClusteringP2P, + AlloProfClusteringP2PFast, + AlloProfClusteringS2S, + AlloProfClusteringS2SFast, + HALClusteringS2S, + HALClusteringS2SFast, +) +from .jpn import LivedoorNewsClustering, LivedoorNewsClusteringv2, MewsC16JaClustering +from .multilingual import ( + IndicReviewsClusteringP2P, + MasakhaNEWSClusteringP2P, + MasakhaNEWSClusteringS2S, + MLSUMClusteringP2P, + MLSUMClusteringP2PFast, + MLSUMClusteringS2S, + MLSUMClusteringS2SFast, + SIB200ClusteringFast, + WikiClusteringFastP2P, + WikiClusteringP2P, +) +from .nob import ( + SNLClustering, + SNLHierarchicalClusteringP2P, + SNLHierarchicalClusteringS2S, + VGClustering, + VGHierarchicalClusteringP2P, + VGHierarchicalClusteringS2S, +) +from .pol import ( + EightTagsClustering, + EightTagsClusteringFast, + PlscClusteringP2P, + PlscClusteringP2PFast, + PlscClusteringS2S, + PlscClusteringS2SFast, +) +from .rom import RomaniBibleClustering +from .rus import ( + GeoreviewClusteringP2P, + RuSciBenchGRNTIClusteringP2P, + RuSciBenchOECDClusteringP2P, +) +from .spa import SpanishNewsClusteringP2P +from .swe import SwednClustering, SwednClusteringFastS2S, SwednClusteringP2P +from .zho import ( + CLSClusteringFastP2P, + CLSClusteringFastS2S, + CLSClusteringP2P, + CLSClusteringS2S, + ThuNewsClusteringFastP2P, + ThuNewsClusteringFastS2S, + ThuNewsClusteringP2P, + ThuNewsClusteringS2S, +) + +__all__ = [ + "CLSClusteringFastP2P", + "CLSClusteringFastS2S", + "CLSClusteringP2P", + "CLSClusteringS2S", + "ThuNewsClusteringFastP2P", + "ThuNewsClusteringFastS2S", + "ThuNewsClusteringP2P", + "ThuNewsClusteringS2S", + "SpanishNewsClusteringP2P", + "EightTagsClustering", + "EightTagsClusteringFast", + "PlscClusteringP2P", + "PlscClusteringP2PFast", + "PlscClusteringS2S", + "PlscClusteringS2SFast", + "SwednClustering", + "SwednClusteringFastS2S", + "SwednClusteringP2P", + "VGClustering", + "SNLHierarchicalClusteringP2P", + "SNLHierarchicalClusteringS2S", + "SNLClustering", + "VGHierarchicalClusteringP2P", + "VGHierarchicalClusteringS2S", + "BlurbsClusteringS2S", + "BlurbsClusteringS2SFast", + "TenKGnadClusteringP2P", + "TenKGnadClusteringP2PFast", + "TenKGnadClusteringS2S", + "TenKGnadClusteringS2SFast", + "BlurbsClusteringP2P", + "BlurbsClusteringP2PFast", + "RomaniBibleClustering", + "MedrxivClusteringS2S", + "MedrxivClusteringS2SFast", + "BiorxivClusteringS2S", + "BiorxivClusteringS2SFast", + "StackExchangeClustering", + "StackExchangeClusteringFast", + "RedditClustering", + "RedditFastClusteringS2S", + "ArxivClusteringS2S", + "ArxivClusteringP2P", + "ArxivClusteringP2PFast", + "MedrxivClusteringP2P", + "MedrxivClusteringP2PFast", + "WikiCitiesClustering", + "BiorxivClusteringP2P", + "BiorxivClusteringP2PFast", + "TwentyNewsgroupsClustering", + "TwentyNewsgroupsClusteringFast", + "ArXivHierarchicalClusteringP2P", + "ArXivHierarchicalClusteringS2S", + "BigPatentClustering", + "BigPatentClusteringFast", + "StackExchangeClusteringP2P", + "StackExchangeClusteringP2PFast", + "RedditClusteringP2P", + "RedditFastClusteringP2P", + "LivedoorNewsClustering", + "LivedoorNewsClusteringv2", + "MewsC16JaClustering", + "WikiClusteringFastP2P", + "WikiClusteringP2P", + "MLSUMClusteringS2S", + "MLSUMClusteringS2SFast", + "MasakhaNEWSClusteringS2S", + "MLSUMClusteringP2P", + "MLSUMClusteringP2PFast", + "IndicReviewsClusteringP2P", + "SIB200ClusteringFast", + "MasakhaNEWSClusteringP2P", + "GeoreviewClusteringP2P", + "RuSciBenchGRNTIClusteringP2P", + "RuSciBenchOECDClusteringP2P", + "HALClusteringS2S", + "HALClusteringS2SFast", + "AlloProfClusteringS2S", + "AlloProfClusteringS2SFast", + "AlloProfClusteringP2P", + "AlloProfClusteringP2PFast", +] diff --git a/mteb/tasks/Clustering/deu/__init__.py b/mteb/tasks/Clustering/deu/__init__.py index e69de29bb2..67ebc77c34 100644 --- a/mteb/tasks/Clustering/deu/__init__.py +++ b/mteb/tasks/Clustering/deu/__init__.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from .BlurbsClusteringP2P import BlurbsClusteringP2P, BlurbsClusteringP2PFast +from .BlurbsClusteringS2S import BlurbsClusteringS2S, BlurbsClusteringS2SFast +from .TenKGnadClusteringP2P import TenKGnadClusteringP2P, TenKGnadClusteringP2PFast +from .TenKGnadClusteringS2S import TenKGnadClusteringS2S, TenKGnadClusteringS2SFast + +__all__ = [ + "BlurbsClusteringS2S", + "BlurbsClusteringS2SFast", + "TenKGnadClusteringP2P", + "TenKGnadClusteringP2PFast", + "TenKGnadClusteringS2S", + "TenKGnadClusteringS2SFast", + "BlurbsClusteringP2P", + "BlurbsClusteringP2PFast", +] diff --git a/mteb/tasks/Clustering/eng/__init__.py b/mteb/tasks/Clustering/eng/__init__.py index e69de29bb2..39773845ba 100644 --- a/mteb/tasks/Clustering/eng/__init__.py +++ b/mteb/tasks/Clustering/eng/__init__.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from .ArxivClusteringP2P import ArxivClusteringP2P, ArxivClusteringP2PFast +from .ArxivClusteringS2S import ArxivClusteringS2S +from .ArXivHierarchicalClustering import ( + ArXivHierarchicalClusteringP2P, + ArXivHierarchicalClusteringS2S, +) +from .BigPatentClustering import BigPatentClustering, BigPatentClusteringFast +from .BiorxivClusteringP2P import BiorxivClusteringP2P, BiorxivClusteringP2PFast +from .BiorxivClusteringS2S import BiorxivClusteringS2S, BiorxivClusteringS2SFast +from .MedrxivClusteringP2P import MedrxivClusteringP2P, MedrxivClusteringP2PFast +from .MedrxivClusteringS2S import MedrxivClusteringS2S, MedrxivClusteringS2SFast +from .RedditClustering import RedditClustering, RedditFastClusteringS2S +from .RedditClusteringP2P import RedditClusteringP2P, RedditFastClusteringP2P +from .StackExchangeClustering import ( + StackExchangeClustering, + StackExchangeClusteringFast, +) +from .StackExchangeClusteringP2P import ( + StackExchangeClusteringP2P, + StackExchangeClusteringP2PFast, +) +from .TwentyNewsgroupsClustering import ( + TwentyNewsgroupsClustering, + TwentyNewsgroupsClusteringFast, +) +from .WikiCitiesClustering import WikiCitiesClustering + +__all__ = [ + "MedrxivClusteringS2S", + "MedrxivClusteringS2SFast", + "BiorxivClusteringS2S", + "BiorxivClusteringS2SFast", + "StackExchangeClustering", + "StackExchangeClusteringFast", + "RedditClustering", + "RedditFastClusteringS2S", + "ArxivClusteringS2S", + "ArxivClusteringP2P", + "ArxivClusteringP2PFast", + "MedrxivClusteringP2P", + "MedrxivClusteringP2PFast", + "WikiCitiesClustering", + "BiorxivClusteringP2P", + "BiorxivClusteringP2PFast", + "TwentyNewsgroupsClustering", + "TwentyNewsgroupsClusteringFast", + "ArXivHierarchicalClusteringP2P", + "ArXivHierarchicalClusteringS2S", + "BigPatentClustering", + "BigPatentClusteringFast", + "StackExchangeClusteringP2P", + "StackExchangeClusteringP2PFast", + "RedditClusteringP2P", + "RedditFastClusteringP2P", +] diff --git a/mteb/tasks/Clustering/fra/__init__.py b/mteb/tasks/Clustering/fra/__init__.py index e69de29bb2..cdc9f26fd4 100644 --- a/mteb/tasks/Clustering/fra/__init__.py +++ b/mteb/tasks/Clustering/fra/__init__.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from .AlloProfClusteringP2P import AlloProfClusteringP2P, AlloProfClusteringP2PFast +from .AlloProfClusteringS2S import AlloProfClusteringS2S, AlloProfClusteringS2SFast +from .HALClusteringS2S import HALClusteringS2S, HALClusteringS2SFast + +__all__ = [ + "HALClusteringS2S", + "HALClusteringS2SFast", + "AlloProfClusteringS2S", + "AlloProfClusteringS2SFast", + "AlloProfClusteringP2P", + "AlloProfClusteringP2PFast", +] diff --git a/mteb/tasks/Clustering/jpn/__init__.py b/mteb/tasks/Clustering/jpn/__init__.py index e69de29bb2..033d1c6173 100644 --- a/mteb/tasks/Clustering/jpn/__init__.py +++ b/mteb/tasks/Clustering/jpn/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .LivedoorNewsClustering import LivedoorNewsClustering, LivedoorNewsClusteringv2 +from .MewsC16JaClustering import MewsC16JaClustering + +__all__ = ["LivedoorNewsClustering", "LivedoorNewsClusteringv2", "MewsC16JaClustering"] diff --git a/mteb/tasks/Clustering/multilingual/__init__.py b/mteb/tasks/Clustering/multilingual/__init__.py index e69de29bb2..b6e78fa790 100644 --- a/mteb/tasks/Clustering/multilingual/__init__.py +++ b/mteb/tasks/Clustering/multilingual/__init__.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from .IndicReviewsClusteringP2P import IndicReviewsClusteringP2P +from .MasakhaNEWSClusteringP2P import MasakhaNEWSClusteringP2P +from .MasakhaNEWSClusteringS2S import MasakhaNEWSClusteringS2S +from .MLSUMClusteringP2P import MLSUMClusteringP2P, MLSUMClusteringP2PFast +from .MLSUMClusteringS2S import MLSUMClusteringS2S, MLSUMClusteringS2SFast +from .SIB200ClusteringS2S import SIB200ClusteringFast +from .WikiClusteringP2P import WikiClusteringFastP2P, WikiClusteringP2P + +__all__ = [ + "WikiClusteringFastP2P", + "WikiClusteringP2P", + "MLSUMClusteringS2S", + "MLSUMClusteringS2SFast", + "MasakhaNEWSClusteringS2S", + "MLSUMClusteringP2P", + "MLSUMClusteringP2PFast", + "IndicReviewsClusteringP2P", + "SIB200ClusteringFast", + "MasakhaNEWSClusteringP2P", +] diff --git a/mteb/tasks/Clustering/nob/__init__.py b/mteb/tasks/Clustering/nob/__init__.py index e69de29bb2..1ebda31080 100644 --- a/mteb/tasks/Clustering/nob/__init__.py +++ b/mteb/tasks/Clustering/nob/__init__.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from .snl_clustering import SNLClustering +from .SNLHierarchicalClustering import ( + SNLHierarchicalClusteringP2P, + SNLHierarchicalClusteringS2S, +) +from .vg_clustering import VGClustering +from .VGHierarchicalClustering import ( + VGHierarchicalClusteringP2P, + VGHierarchicalClusteringS2S, +) + +__all__ = [ + "VGClustering", + "SNLHierarchicalClusteringP2P", + "SNLHierarchicalClusteringS2S", + "SNLClustering", + "VGHierarchicalClusteringP2P", + "VGHierarchicalClusteringS2S", +] diff --git a/mteb/tasks/Clustering/pol/__init__.py b/mteb/tasks/Clustering/pol/__init__.py index e69de29bb2..6b19d6cdb6 100644 --- a/mteb/tasks/Clustering/pol/__init__.py +++ b/mteb/tasks/Clustering/pol/__init__.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from .PolishClustering import ( + EightTagsClustering, + EightTagsClusteringFast, + PlscClusteringP2P, + PlscClusteringP2PFast, + PlscClusteringS2S, + PlscClusteringS2SFast, +) + +__all__ = [ + "EightTagsClustering", + "EightTagsClusteringFast", + "PlscClusteringP2P", + "PlscClusteringP2PFast", + "PlscClusteringS2S", + "PlscClusteringS2SFast", +] diff --git a/mteb/tasks/Clustering/rom/__init__.py b/mteb/tasks/Clustering/rom/__init__.py new file mode 100644 index 0000000000..5ab88efd50 --- /dev/null +++ b/mteb/tasks/Clustering/rom/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .RomaniBibleClustering import RomaniBibleClustering + +__all__ = ["RomaniBibleClustering"] diff --git a/mteb/tasks/Clustering/rus/__init__.py b/mteb/tasks/Clustering/rus/__init__.py index e69de29bb2..295b1663fe 100644 --- a/mteb/tasks/Clustering/rus/__init__.py +++ b/mteb/tasks/Clustering/rus/__init__.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from .GeoreviewClusteringP2P import GeoreviewClusteringP2P +from .RuSciBenchGRNTIClusteringP2P import RuSciBenchGRNTIClusteringP2P +from .RuSciBenchOECDClusteringP2P import RuSciBenchOECDClusteringP2P + +__all__ = [ + "GeoreviewClusteringP2P", + "RuSciBenchGRNTIClusteringP2P", + "RuSciBenchOECDClusteringP2P", +] diff --git a/mteb/tasks/Clustering/spa/__init__.py b/mteb/tasks/Clustering/spa/__init__.py index e69de29bb2..c67454d7f3 100644 --- a/mteb/tasks/Clustering/spa/__init__.py +++ b/mteb/tasks/Clustering/spa/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SpanishNewsClusteringP2P import SpanishNewsClusteringP2P + +__all__ = ["SpanishNewsClusteringP2P"] diff --git a/mteb/tasks/Clustering/swe/__init__.py b/mteb/tasks/Clustering/swe/__init__.py index e69de29bb2..47a7599596 100644 --- a/mteb/tasks/Clustering/swe/__init__.py +++ b/mteb/tasks/Clustering/swe/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .swedn_clustering import SwednClustering +from .SwednClustering import SwednClusteringFastS2S, SwednClusteringP2P + +__all__ = ["SwednClustering", "SwednClusteringFastS2S", "SwednClusteringP2P"] diff --git a/mteb/tasks/Clustering/zho/__init__.py b/mteb/tasks/Clustering/zho/__init__.py index e69de29bb2..8aa2618aa1 100644 --- a/mteb/tasks/Clustering/zho/__init__.py +++ b/mteb/tasks/Clustering/zho/__init__.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from .CMTEBClustering import ( + CLSClusteringFastP2P, + CLSClusteringFastS2S, + CLSClusteringP2P, + CLSClusteringS2S, + ThuNewsClusteringFastP2P, + ThuNewsClusteringFastS2S, + ThuNewsClusteringP2P, + ThuNewsClusteringS2S, +) + +__all__ = [ + "CLSClusteringFastP2P", + "CLSClusteringFastS2S", + "CLSClusteringP2P", + "CLSClusteringS2S", + "ThuNewsClusteringFastP2P", + "ThuNewsClusteringFastS2S", + "ThuNewsClusteringP2P", + "ThuNewsClusteringS2S", +] diff --git a/mteb/tasks/InstructionReranking/__init__.py b/mteb/tasks/InstructionReranking/__init__.py index f5e812247d..82523ac4ca 100644 --- a/mteb/tasks/InstructionReranking/__init__.py +++ b/mteb/tasks/InstructionReranking/__init__.py @@ -1,6 +1,16 @@ from __future__ import annotations -from .eng.Core17InstructionRetrieval import * -from .eng.News21InstructionRetrieval import * -from .eng.Robust04InstructionRetrieval import * -from .multilingual.mFollowIR import * +from .eng import ( + Core17InstructionRetrieval, + News21InstructionRetrieval, + Robust04InstructionRetrieval, +) +from .multilingual import mFollowIR, mFollowIRCrossLingual + +__all__ = [ + "News21InstructionRetrieval", + "Core17InstructionRetrieval", + "Robust04InstructionRetrieval", + "mFollowIR", + "mFollowIRCrossLingual", +] diff --git a/mteb/tasks/InstructionReranking/eng/__init__.py b/mteb/tasks/InstructionReranking/eng/__init__.py index e69de29bb2..ad30bb4ea2 100644 --- a/mteb/tasks/InstructionReranking/eng/__init__.py +++ b/mteb/tasks/InstructionReranking/eng/__init__.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from .Core17InstructionRetrieval import Core17InstructionRetrieval +from .News21InstructionRetrieval import News21InstructionRetrieval +from .Robust04InstructionRetrieval import Robust04InstructionRetrieval + +__all__ = [ + "News21InstructionRetrieval", + "Core17InstructionRetrieval", + "Robust04InstructionRetrieval", +] diff --git a/mteb/tasks/InstructionReranking/multilingual/__init__.py b/mteb/tasks/InstructionReranking/multilingual/__init__.py index e69de29bb2..a559987717 100644 --- a/mteb/tasks/InstructionReranking/multilingual/__init__.py +++ b/mteb/tasks/InstructionReranking/multilingual/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .mFollowIR import mFollowIR, mFollowIRCrossLingual + +__all__ = ["mFollowIR", "mFollowIRCrossLingual"] diff --git a/mteb/tasks/InstructionRetrieval/__init__.py b/mteb/tasks/InstructionRetrieval/__init__.py index c8454858dd..60d78aca14 100644 --- a/mteb/tasks/InstructionRetrieval/__init__.py +++ b/mteb/tasks/InstructionRetrieval/__init__.py @@ -1,3 +1,5 @@ from __future__ import annotations -from .eng.InstructIR import * +from .eng import InstructIR + +__all__ = ["InstructIR"] diff --git a/mteb/tasks/InstructionRetrieval/eng/__init__.py b/mteb/tasks/InstructionRetrieval/eng/__init__.py index e69de29bb2..83928dd647 100644 --- a/mteb/tasks/InstructionRetrieval/eng/__init__.py +++ b/mteb/tasks/InstructionRetrieval/eng/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .InstructIR import InstructIR + +__all__ = ["InstructIR"] diff --git a/mteb/tasks/InstructionRetrieval/multilingual/__init__.py b/mteb/tasks/InstructionRetrieval/multilingual/__init__.py new file mode 100644 index 0000000000..4d21ee8507 --- /dev/null +++ b/mteb/tasks/InstructionRetrieval/multilingual/__init__.py @@ -0,0 +1,3 @@ +from __future__ import annotations + +__all__ = [] diff --git a/mteb/tasks/MultiLabelClassification/__init__.py b/mteb/tasks/MultiLabelClassification/__init__.py index 0cf8c1bf6a..b27c4f8111 100644 --- a/mteb/tasks/MultiLabelClassification/__init__.py +++ b/mteb/tasks/MultiLabelClassification/__init__.py @@ -1,8 +1,16 @@ from __future__ import annotations -from .kor.KorHateSpeechMLClassification import * -from .mlt.MalteseNewsClassification import * -from .multilingual.MultiEURLEXMultilabelClassification import * -from .por.BrazilianToxicTweetsClassification import * -from .rus.CEDRClassification import * -from .rus.SensitiveTopicsClassification import * +from .kor import KorHateSpeechMLClassification +from .mlt import MalteseNewsClassification +from .multilingual import MultiEURLEXMultilabelClassification +from .por import BrazilianToxicTweetsClassification +from .rus import CEDRClassification, SensitiveTopicsClassification + +__all__ = [ + "BrazilianToxicTweetsClassification", + "MalteseNewsClassification", + "KorHateSpeechMLClassification", + "MultiEURLEXMultilabelClassification", + "SensitiveTopicsClassification", + "CEDRClassification", +] diff --git a/mteb/tasks/MultiLabelClassification/kor/__init__.py b/mteb/tasks/MultiLabelClassification/kor/__init__.py index e69de29bb2..02efbdca47 100644 --- a/mteb/tasks/MultiLabelClassification/kor/__init__.py +++ b/mteb/tasks/MultiLabelClassification/kor/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .KorHateSpeechMLClassification import KorHateSpeechMLClassification + +__all__ = ["KorHateSpeechMLClassification"] diff --git a/mteb/tasks/MultiLabelClassification/mlt/__init__.py b/mteb/tasks/MultiLabelClassification/mlt/__init__.py index e69de29bb2..c1aa8a2ff2 100644 --- a/mteb/tasks/MultiLabelClassification/mlt/__init__.py +++ b/mteb/tasks/MultiLabelClassification/mlt/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .MalteseNewsClassification import MalteseNewsClassification + +__all__ = ["MalteseNewsClassification"] diff --git a/mteb/tasks/MultiLabelClassification/multilingual/__init__.py b/mteb/tasks/MultiLabelClassification/multilingual/__init__.py index e69de29bb2..235cac1550 100644 --- a/mteb/tasks/MultiLabelClassification/multilingual/__init__.py +++ b/mteb/tasks/MultiLabelClassification/multilingual/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .MultiEURLEXMultilabelClassification import MultiEURLEXMultilabelClassification + +__all__ = ["MultiEURLEXMultilabelClassification"] diff --git a/mteb/tasks/MultiLabelClassification/por/__init__.py b/mteb/tasks/MultiLabelClassification/por/__init__.py index e69de29bb2..0e30038e8b 100644 --- a/mteb/tasks/MultiLabelClassification/por/__init__.py +++ b/mteb/tasks/MultiLabelClassification/por/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .BrazilianToxicTweetsClassification import BrazilianToxicTweetsClassification + +__all__ = ["BrazilianToxicTweetsClassification"] diff --git a/mteb/tasks/MultiLabelClassification/rus/__init__.py b/mteb/tasks/MultiLabelClassification/rus/__init__.py index e69de29bb2..18b6192a0b 100644 --- a/mteb/tasks/MultiLabelClassification/rus/__init__.py +++ b/mteb/tasks/MultiLabelClassification/rus/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .CEDRClassification import CEDRClassification +from .SensitiveTopicsClassification import SensitiveTopicsClassification + +__all__ = ["SensitiveTopicsClassification", "CEDRClassification"] diff --git a/mteb/tasks/PairClassification/__init__.py b/mteb/tasks/PairClassification/__init__.py index 1193728659..03bf5a90a8 100644 --- a/mteb/tasks/PairClassification/__init__.py +++ b/mteb/tasks/PairClassification/__init__.py @@ -1,26 +1,58 @@ from __future__ import annotations -from .ara.ArEntail import * -from .ces.CTKFactsNLI import * -from .deu.FalseFriendsDeEnPC import * -from .eng.LegalBenchPC import * -from .eng.SprintDuplicateQuestionsPC import * -from .eng.TwitterSemEval2015PC import * -from .eng.TwitterURLCorpusPC import * -from .fas.FarsTail import * -from .hye.ArmenianParaphrasePC import * -from .ind.IndoNLI import * -from .kor.KlueNLI import * -from .multilingual.IndicXnliPairClassification import ( - IndicXnliPairClassification as IndicXnliPairClassification, +from .ara import ArEntail +from .ces import CTKFactsNLI +from .deu import FalseFriendsDeEnPC +from .eng import ( + LegalBenchPC, + SprintDuplicateQuestionsPC, + TwitterSemEval2015PC, + TwitterURLCorpusPC, ) -from .multilingual.OpusparcusPC import * -from .multilingual.PawsXPairClassification import * -from .multilingual.RTE3 import * -from .multilingual.XNLI import * -from .multilingual.XStance import * -from .pol.PolishPC import * -from .por.Assin2RTE import * -from .por.SickBrPC import * -from .rus.TERRa import * -from .zho.CMTEBPairClassification import * +from .fas import FarsTail +from .hye import ArmenianParaphrasePC +from .ind import IndoNLI +from .kor import KlueNLI +from .multilingual import ( + RTE3, + XNLI, + XNLIV2, + IndicXnliPairClassification, + OpusparcusPC, + PawsXPairClassification, + XStance, +) +from .pol import CdscePC, PpcPC, PscPC, SickePLPC +from .por import Assin2RTE, SickBrPC +from .rus import TERRa +from .zho import Cmnli, Ocnli + +__all__ = [ + "Cmnli", + "Ocnli", + "Assin2RTE", + "SickBrPC", + "CdscePC", + "PpcPC", + "PscPC", + "SickePLPC", + "IndoNLI", + "FalseFriendsDeEnPC", + "ArEntail", + "ArmenianParaphrasePC", + "CTKFactsNLI", + "LegalBenchPC", + "TwitterSemEval2015PC", + "TwitterURLCorpusPC", + "SprintDuplicateQuestionsPC", + "FarsTail", + "KlueNLI", + "IndicXnliPairClassification", + "OpusparcusPC", + "PawsXPairClassification", + "RTE3", + "XStance", + "XNLI", + "XNLIV2", + "TERRa", +] diff --git a/mteb/tasks/PairClassification/ara/__init__.py b/mteb/tasks/PairClassification/ara/__init__.py new file mode 100644 index 0000000000..9619569bca --- /dev/null +++ b/mteb/tasks/PairClassification/ara/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .ArEntail import ArEntail + +__all__ = ["ArEntail"] diff --git a/mteb/tasks/PairClassification/ces/__init__.py b/mteb/tasks/PairClassification/ces/__init__.py new file mode 100644 index 0000000000..82a0417518 --- /dev/null +++ b/mteb/tasks/PairClassification/ces/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .CTKFactsNLI import CTKFactsNLI + +__all__ = ["CTKFactsNLI"] diff --git a/mteb/tasks/PairClassification/deu/__init__.py b/mteb/tasks/PairClassification/deu/__init__.py index 8b13789179..27c07a9982 100644 --- a/mteb/tasks/PairClassification/deu/__init__.py +++ b/mteb/tasks/PairClassification/deu/__init__.py @@ -1 +1,5 @@ +from __future__ import annotations +from .FalseFriendsDeEnPC import FalseFriendsDeEnPC + +__all__ = ["FalseFriendsDeEnPC"] diff --git a/mteb/tasks/PairClassification/eng/__init__.py b/mteb/tasks/PairClassification/eng/__init__.py index e69de29bb2..5fdb9d7080 100644 --- a/mteb/tasks/PairClassification/eng/__init__.py +++ b/mteb/tasks/PairClassification/eng/__init__.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from .LegalBenchPC import LegalBenchPC +from .SprintDuplicateQuestionsPC import SprintDuplicateQuestionsPC +from .TwitterSemEval2015PC import TwitterSemEval2015PC +from .TwitterURLCorpusPC import TwitterURLCorpusPC + +__all__ = [ + "LegalBenchPC", + "TwitterSemEval2015PC", + "TwitterURLCorpusPC", + "SprintDuplicateQuestionsPC", +] diff --git a/mteb/tasks/PairClassification/fas/__init__.py b/mteb/tasks/PairClassification/fas/__init__.py new file mode 100644 index 0000000000..81e6cffc84 --- /dev/null +++ b/mteb/tasks/PairClassification/fas/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .FarsTail import FarsTail + +__all__ = ["FarsTail"] diff --git a/mteb/tasks/PairClassification/hye/__init__.py b/mteb/tasks/PairClassification/hye/__init__.py index e69de29bb2..33ba14204e 100644 --- a/mteb/tasks/PairClassification/hye/__init__.py +++ b/mteb/tasks/PairClassification/hye/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .ArmenianParaphrasePC import ArmenianParaphrasePC + +__all__ = ["ArmenianParaphrasePC"] diff --git a/mteb/tasks/PairClassification/ind/__init__.py b/mteb/tasks/PairClassification/ind/__init__.py new file mode 100644 index 0000000000..73e2935997 --- /dev/null +++ b/mteb/tasks/PairClassification/ind/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .IndoNLI import IndoNLI + +__all__ = ["IndoNLI"] diff --git a/mteb/tasks/PairClassification/kor/__init__.py b/mteb/tasks/PairClassification/kor/__init__.py new file mode 100644 index 0000000000..83b58c5d97 --- /dev/null +++ b/mteb/tasks/PairClassification/kor/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .KlueNLI import KlueNLI + +__all__ = ["KlueNLI"] diff --git a/mteb/tasks/PairClassification/multilingual/__init__.py b/mteb/tasks/PairClassification/multilingual/__init__.py index e69de29bb2..a9ec6dc333 100644 --- a/mteb/tasks/PairClassification/multilingual/__init__.py +++ b/mteb/tasks/PairClassification/multilingual/__init__.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from .IndicXnliPairClassification import IndicXnliPairClassification +from .OpusparcusPC import OpusparcusPC +from .PawsXPairClassification import PawsXPairClassification +from .RTE3 import RTE3 +from .XNLI import XNLI, XNLIV2 +from .XStance import XStance + +__all__ = [ + "IndicXnliPairClassification", + "OpusparcusPC", + "PawsXPairClassification", + "RTE3", + "XStance", + "XNLI", + "XNLIV2", +] diff --git a/mteb/tasks/PairClassification/pol/__init__.py b/mteb/tasks/PairClassification/pol/__init__.py index e69de29bb2..661977b294 100644 --- a/mteb/tasks/PairClassification/pol/__init__.py +++ b/mteb/tasks/PairClassification/pol/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .PolishPC import CdscePC, PpcPC, PscPC, SickePLPC + +__all__ = ["CdscePC", "PpcPC", "PscPC", "SickePLPC"] diff --git a/mteb/tasks/PairClassification/por/__init__.py b/mteb/tasks/PairClassification/por/__init__.py new file mode 100644 index 0000000000..fa28ff584c --- /dev/null +++ b/mteb/tasks/PairClassification/por/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .Assin2RTE import Assin2RTE +from .SickBrPC import SickBrPC + +__all__ = ["Assin2RTE", "SickBrPC"] diff --git a/mteb/tasks/PairClassification/rus/__init__.py b/mteb/tasks/PairClassification/rus/__init__.py index e69de29bb2..71fcf2dfca 100644 --- a/mteb/tasks/PairClassification/rus/__init__.py +++ b/mteb/tasks/PairClassification/rus/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .TERRa import TERRa + +__all__ = ["TERRa"] diff --git a/mteb/tasks/PairClassification/zho/__init__.py b/mteb/tasks/PairClassification/zho/__init__.py index e69de29bb2..7ad9656532 100644 --- a/mteb/tasks/PairClassification/zho/__init__.py +++ b/mteb/tasks/PairClassification/zho/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .CMTEBPairClassification import Cmnli, Ocnli + +__all__ = ["Cmnli", "Ocnli"] diff --git a/mteb/tasks/Reranking/__init__.py b/mteb/tasks/Reranking/__init__.py index 18dbd53f43..497e2751b8 100644 --- a/mteb/tasks/Reranking/__init__.py +++ b/mteb/tasks/Reranking/__init__.py @@ -1,16 +1,35 @@ from __future__ import annotations -from .eng.AskUbuntuDupQuestions import * -from .eng.MindSmallReranking import * -from .eng.NevIR import * -from .eng.SciDocsReranking import * -from .eng.StackOverflowDupQuestions import * -from .eng.WebLINXCandidatesReranking import * -from .fra.AlloprofReranking import * -from .fra.SyntecReranking import * -from .jpn.MMarcoReranking import * -from .multilingual.ESCIReranking import * -from .multilingual.MIRACLReranking import * -from .multilingual.WikipediaRerankingMultilingual import * -from .rus.RuBQReranking import * -from .zho.CMTEBReranking import * +from .eng import ( + AskUbuntuDupQuestions, + MindSmallReranking, + NevIR, + SciDocsReranking, + StackOverflowDupQuestions, + WebLINXCandidatesReranking, +) +from .fra import AlloprofReranking, SyntecReranking +from .jpn import VoyageMMarcoReranking +from .multilingual import ESCIReranking, MIRACLReranking, WikipediaRerankingMultilingual +from .rus import RuBQReranking +from .zho import CMedQAv1, CMedQAv2, MMarcoReranking, T2Reranking + +__all__ = [ + "CMedQAv1", + "CMedQAv2", + "MMarcoReranking", + "T2Reranking", + "AskUbuntuDupQuestions", + "WebLINXCandidatesReranking", + "StackOverflowDupQuestions", + "NevIR", + "MindSmallReranking", + "SciDocsReranking", + "VoyageMMarcoReranking", + "MIRACLReranking", + "ESCIReranking", + "WikipediaRerankingMultilingual", + "RuBQReranking", + "SyntecReranking", + "AlloprofReranking", +] diff --git a/mteb/tasks/Reranking/eng/__init__.py b/mteb/tasks/Reranking/eng/__init__.py index e69de29bb2..a9975a736d 100644 --- a/mteb/tasks/Reranking/eng/__init__.py +++ b/mteb/tasks/Reranking/eng/__init__.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from .AskUbuntuDupQuestions import AskUbuntuDupQuestions +from .MindSmallReranking import MindSmallReranking +from .NevIR import NevIR +from .SciDocsReranking import SciDocsReranking +from .StackOverflowDupQuestions import StackOverflowDupQuestions +from .WebLINXCandidatesReranking import WebLINXCandidatesReranking + +__all__ = [ + "AskUbuntuDupQuestions", + "WebLINXCandidatesReranking", + "StackOverflowDupQuestions", + "NevIR", + "MindSmallReranking", + "SciDocsReranking", +] diff --git a/mteb/tasks/Reranking/fra/__init__.py b/mteb/tasks/Reranking/fra/__init__.py index e69de29bb2..a9f6cc9d64 100644 --- a/mteb/tasks/Reranking/fra/__init__.py +++ b/mteb/tasks/Reranking/fra/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .AlloprofReranking import AlloprofReranking +from .SyntecReranking import SyntecReranking + +__all__ = ["SyntecReranking", "AlloprofReranking"] diff --git a/mteb/tasks/Reranking/jpn/__init__.py b/mteb/tasks/Reranking/jpn/__init__.py new file mode 100644 index 0000000000..bb4df4f804 --- /dev/null +++ b/mteb/tasks/Reranking/jpn/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .MMarcoReranking import VoyageMMarcoReranking + +__all__ = ["VoyageMMarcoReranking"] diff --git a/mteb/tasks/Reranking/multilingual/__init__.py b/mteb/tasks/Reranking/multilingual/__init__.py index e69de29bb2..67f1504333 100644 --- a/mteb/tasks/Reranking/multilingual/__init__.py +++ b/mteb/tasks/Reranking/multilingual/__init__.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from .ESCIReranking import ESCIReranking +from .MIRACLReranking import MIRACLReranking +from .WikipediaRerankingMultilingual import WikipediaRerankingMultilingual + +__all__ = ["MIRACLReranking", "ESCIReranking", "WikipediaRerankingMultilingual"] diff --git a/mteb/tasks/Reranking/rus/__init__.py b/mteb/tasks/Reranking/rus/__init__.py index e69de29bb2..579e3727fa 100644 --- a/mteb/tasks/Reranking/rus/__init__.py +++ b/mteb/tasks/Reranking/rus/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .RuBQReranking import RuBQReranking + +__all__ = ["RuBQReranking"] diff --git a/mteb/tasks/Reranking/zho/__init__.py b/mteb/tasks/Reranking/zho/__init__.py index e69de29bb2..b9be2bdc26 100644 --- a/mteb/tasks/Reranking/zho/__init__.py +++ b/mteb/tasks/Reranking/zho/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .CMTEBReranking import CMedQAv1, CMedQAv2, MMarcoReranking, T2Reranking + +__all__ = ["CMedQAv1", "CMedQAv2", "MMarcoReranking", "T2Reranking"] diff --git a/mteb/tasks/Retrieval/__init__.py b/mteb/tasks/Retrieval/__init__.py index ca41d4354f..859e8d3a49 100644 --- a/mteb/tasks/Retrieval/__init__.py +++ b/mteb/tasks/Retrieval/__init__.py @@ -1,147 +1,348 @@ from __future__ import annotations -from .ara.SadeemQuestionRetrieval import * -from .code.AppsRetrieval import * -from .code.CodeEditSearchRetrieval import * -from .code.CodeFeedbackMTRetrieval import * -from .code.CodeFeedbackSTRetrieval import * -from .code.CodeSearchNetCCRetrieval import * -from .code.CodeSearchNetRetrieval import * -from .code.CodeTransOceanContestRetrieval import * -from .code.CodeTransOceanDLRetrieval import * -from .code.COIRCodeSearchNetRetrieval import * -from .code.CosQARetrieval import * -from .code.StackOverflowQARetrieval import * -from .code.SyntheticText2SqlRetrieval import * -from .dan.DanFeverRetrieval import * -from .dan.TV2Nordretrieval import * -from .dan.TwitterHjerneRetrieval import * -from .deu.GerDaLIRRetrieval import * -from .deu.GerDaLIRSmallRetrieval import * -from .deu.GermanDPRRetrieval import * -from .deu.GermanGovServiceRetrieval import * -from .deu.GermanQuADRetrieval import * -from .deu.LegalQuADRetrieval import * -from .ell.GreekCivicsQA import * -from .eng.AILACasedocsRetrieval import * -from .eng.AILAStatutesRetrieval import * -from .eng.AlphaNLIRetrieval import * -from .eng.ARCChallengeRetrieval import * -from .eng.ArguAnaRetrieval import * -from .eng.BrightRetrieval import * -from .eng.ClimateFEVERRetrieval import * -from .eng.CQADupstackAndroidRetrieval import * -from .eng.CQADupstackEnglishRetrieval import * -from .eng.CQADupstackGamingRetrieval import * -from .eng.CQADupstackGisRetrieval import * -from .eng.CQADupstackMathematicaRetrieval import * -from .eng.CQADupstackPhysicsRetrieval import * -from .eng.CQADupstackProgrammersRetrieval import * -from .eng.CQADupstackStatsRetrieval import * -from .eng.CQADupstackTexRetrieval import * -from .eng.CQADupstackUnixRetrieval import * -from .eng.CQADupstackWebmastersRetrieval import * -from .eng.CQADupstackWordpressRetrieval import * -from .eng.DBPediaRetrieval import * -from .eng.FaithDialRetrieval import * -from .eng.FeedbackQARetrieval import * -from .eng.FEVERRetrieval import * -from .eng.FiQA2018Retrieval import * -from .eng.HagridRetrieval import * -from .eng.HellaSwagRetrieval import * -from .eng.HotpotQARetrieval import * -from .eng.LegalBenchConsumerContractsQARetrieval import * -from .eng.LegalBenchCorporateLobbyingRetrieval import * -from .eng.LegalSummarizationRetrieval import * -from .eng.LEMBNarrativeQARetrieval import * -from .eng.LEMBNeedleRetrieval import * -from .eng.LEMBPasskeyRetrieval import * -from .eng.LEMBQMSumRetrieval import * -from .eng.LEMBSummScreenFDRetrieval import * -from .eng.LEMBWikimQARetrieval import * -from .eng.LitSearchRetrieval import * -from .eng.MedicalQARetrieval import * -from .eng.MLQuestions import * -from .eng.MSMARCORetrieval import * -from .eng.MSMARCOv2Retrieval import * -from .eng.NarrativeQARetrieval import * -from .eng.NFCorpusRetrieval import * -from .eng.NQRetrieval import * -from .eng.PiqaRetrieval import * -from .eng.QuailRetrieval import * -from .eng.QuoraRetrieval import * -from .eng.RARbCodeRetrieval import * -from .eng.RARbMathRetrieval import * -from .eng.SCIDOCSRetrieval import * -from .eng.SciFactRetrieval import * -from .eng.SiqaRetrieval import * -from .eng.SpartQARetrieval import * -from .eng.TempReasonL1Retrieval import * -from .eng.TempReasonL2ContextRetrieval import * -from .eng.TempReasonL2FactRetrieval import * -from .eng.TempReasonL2PureRetrieval import * -from .eng.TempReasonL3ContextRetrieval import * -from .eng.TempReasonL3FactRetrieval import * -from .eng.TempReasonL3PureRetrieval import * -from .eng.TopiOCQARetrieval import * -from .eng.Touche2020Retrieval import * -from .eng.TRECCOVIDRetrieval import * -from .eng.WinoGrandeRetrieval import * -from .est.estqa import * -from .fra.AlloprofRetrieval import * -from .fra.BSARDRetrieval import * -from .fra.FQuADRetrieval import * -from .fra.SyntecRetrieval import * -from .hun.HunSum2 import * -from .jpn.JaGovFaqsRetrieval import * -from .jpn.JaqketRetrieval import * -from .jpn.JaQuADRetrieval import * -from .jpn.NLPJournalAbsIntroRetrieval import * -from .jpn.NLPJournalTitleAbsRetrieval import * -from .jpn.NLPJournalTitleIntroRetrieval import * -from .kat.GeorgianFAQRetrieval import * -from .kor.AutoRAGRetrieval import * -from .kor.KoStrategyQA import * -from .multilingual.BelebeleRetrieval import * -from .multilingual.CrossLingualSemanticDiscriminationWMT19 import * -from .multilingual.CrossLingualSemanticDiscriminationWMT21 import * -from .multilingual.CUREv1Retrieval import * -from .multilingual.IndicQARetrieval import * -from .multilingual.MintakaRetrieval import * -from .multilingual.MIRACLRetrieval import * -from .multilingual.MLQARetrieval import * -from .multilingual.MrTidyRetrieval import * -from .multilingual.MultiLongDocRetrieval import * -from .multilingual.NeuCLIR2022Retrieval import * -from .multilingual.NeuCLIR2023Retrieval import * -from .multilingual.PublicHealthQARetrieval import * -from .multilingual.StatcanDialogueDatasetRetrieval import * -from .multilingual.WikipediaRetrievalMultilingual import * -from .multilingual.XMarketRetrieval import * -from .multilingual.XPQARetrieval import * -from .multilingual.XQuADRetrieval import * -from .nob.norquad import * -from .nob.snl_retrieval import * -from .pol.ArguAnaPLRetrieval import * -from .pol.DBPediaPLRetrieval import * -from .pol.FiQAPLRetrieval import * -from .pol.HotpotQAPLRetrieval import * -from .pol.MSMARCOPLRetrieval import * -from .pol.NFCorpusPLRetrieval import * -from .pol.NQPLRetrieval import * -from .pol.QuoraPLRetrieval import * -from .pol.SCIDOCSPLRetrieval import * -from .pol.SciFactPLRetrieval import * -from .pol.TRECCOVIDPLRetrieval import * -from .rus.RiaNewsRetrieval import * -from .rus.RuBQRetrieval import * -from .slk.SKQuadRetrieval import * -from .slk.SlovakSumRetrieval import * -from .spa.SpanishPassageRetrievalS2P import * -from .spa.SpanishPassageRetrievalS2S import * -from .swe.SwednRetrieval import * -from .swe.SweFaqRetrieval import * -from .tur.TurHistQuad import * -from .vie.VieQuADRetrieval import * -from .zho.CMTEBRetrieval import * -from .zho.LeCaRDv2Retrieval import * +from .ara import SadeemQuestionRetrieval +from .code import ( + AppsRetrieval, + CodeEditSearchRetrieval, + CodeFeedbackMT, + CodeFeedbackST, + CodeSearchNetCCRetrieval, + CodeSearchNetRetrieval, + CodeTransOceanContestRetrieval, + CodeTransOceanDLRetrieval, + COIRCodeSearchNetRetrieval, + CosQARetrieval, + StackOverflowQARetrieval, + SyntheticText2SQLRetrieval, +) +from .dan import DanFever, DanFeverRetrieval, TV2Nordretrieval, TwitterHjerneRetrieval +from .deu import ( + GerDaLIR, + GerDaLIRSmall, + GermanDPR, + GermanGovServiceRetrieval, + GermanQuADRetrieval, + LegalQuAD, +) +from .ell import GreekCivicsQA +from .eng import ( + FEVER, + MSMARCO, + NQ, + PIQA, + SCIDOCS, + SIQA, + TRECCOVID, + AILACasedocs, + AILAStatutes, + AlphaNLI, + ARCChallenge, + ArguAna, + BrightRetrieval, + ClimateFEVER, + ClimateFEVERHardNegatives, + CQADupstackAndroidRetrieval, + CQADupstackEnglishRetrieval, + CQADupstackGamingRetrieval, + CQADupstackGisRetrieval, + CQADupstackMathematicaRetrieval, + CQADupstackPhysicsRetrieval, + CQADupstackProgrammersRetrieval, + CQADupstackStatsRetrieval, + CQADupstackTexRetrieval, + CQADupstackUnixRetrieval, + CQADupstackWebmastersRetrieval, + CQADupstackWordpressRetrieval, + DBPedia, + DBPediaHardNegatives, + FaithDialRetrieval, + FeedbackQARetrieval, + FEVERHardNegatives, + FiQA2018, + HagridRetrieval, + HellaSwag, + HotpotQA, + HotpotQAHardNegatives, + LegalBenchConsumerContractsQA, + LegalBenchCorporateLobbying, + LegalSummarization, + LEMBNarrativeQARetrieval, + LEMBNeedleRetrieval, + LEMBPasskeyRetrieval, + LEMBQMSumRetrieval, + LEMBSummScreenFDRetrieval, + LEMBWikimQARetrieval, + LitSearchRetrieval, + MedicalQARetrieval, + MLQuestionsRetrieval, + MSMARCOHardNegatives, + MSMARCOv2, + NarrativeQARetrieval, + NFCorpus, + NQHardNegatives, + Quail, + QuoraRetrieval, + QuoraRetrievalHardNegatives, + RARbCode, + RARbMath, + SciFact, + SpartQA, + TempReasonL1, + TempReasonL2Context, + TempReasonL2Fact, + TempReasonL2Pure, + TempReasonL3Context, + TempReasonL3Fact, + TempReasonL3Pure, + TopiOCQARetrieval, + TopiOCQARetrievalHardNegatives, + Touche2020, + Touche2020v3Retrieval, + WinoGrande, +) +from .est import EstQA +from .fra import AlloprofRetrieval, BSARDRetrieval, FQuADRetrieval, SyntecRetrieval +from .hun import HunSum2AbstractiveRetrieval +from .jpn import ( + JaGovFaqsRetrieval, + JaqketRetrieval, + JaQuADRetrieval, + NLPJournalAbsIntroRetrieval, + NLPJournalTitleAbsRetrieval, + NLPJournalTitleIntroRetrieval, +) +from .kat import GeorgianFAQRetrieval +from .kor import AutoRAGRetrieval, KoStrategyQA +from .multilingual import ( + BelebeleRetrieval, + CrossLingualSemanticDiscriminationWMT19, + CrossLingualSemanticDiscriminationWMT21, + CUREv1Retrieval, + IndicQARetrieval, + MintakaRetrieval, + MIRACLRetrieval, + MIRACLRetrievalHardNegatives, + MLQARetrieval, + MrTidyRetrieval, + MultiLongDocRetrieval, + NeuCLIR2022Retrieval, + NeuCLIR2022RetrievalHardNegatives, + NeuCLIR2023Retrieval, + NeuCLIR2023RetrievalHardNegatives, + PublicHealthQARetrieval, + StatcanDialogueDatasetRetrieval, + WikipediaRetrievalMultilingual, + XMarket, + XPQARetrieval, + XQuADRetrieval, +) +from .nob import NorQuadRetrieval, SNLRetrieval +from .pol import ( + MSMARCOPL, + NQPL, + SCIDOCSPL, + TRECCOVIDPL, + ArguAnaPL, + DBPediaPL, + DBPediaPLHardNegatives, + FiQAPLRetrieval, + HotpotQAPL, + HotpotQAPLHardNegatives, + MSMARCOPLHardNegatives, + NFCorpusPL, + NQPLHardNegatives, + QuoraPLRetrieval, + QuoraPLRetrievalHardNegatives, + SciFactPL, +) +from .rus import RiaNewsRetrieval, RiaNewsRetrievalHardNegatives, RuBQRetrieval +from .slk import SKQuadRetrieval, SlovakSumRetrieval +from .spa import SpanishPassageRetrievalS2P, SpanishPassageRetrievalS2S +from .swe import SwednRetrieval, SweFaqRetrieval +from .tur import TurHistQuadRetrieval +from .vie import VieQuADRetrieval +from .zho import ( + CmedqaRetrieval, + CovidRetrieval, + DuRetrieval, + EcomRetrieval, + LeCaRDv2, + MedicalRetrieval, + MMarcoRetrieval, + T2Retrieval, + VideoRetrieval, +) + +__all__ = [ + "CmedqaRetrieval", + "CovidRetrieval", + "DuRetrieval", + "EcomRetrieval", + "MMarcoRetrieval", + "MedicalRetrieval", + "T2Retrieval", + "VideoRetrieval", + "LeCaRDv2", + "SpanishPassageRetrievalS2S", + "SpanishPassageRetrievalS2P", + "MSMARCOPL", + "MSMARCOPLHardNegatives", + "SCIDOCSPL", + "SciFactPL", + "ArguAnaPL", + "FiQAPLRetrieval", + "NFCorpusPL", + "QuoraPLRetrieval", + "QuoraPLRetrievalHardNegatives", + "TRECCOVIDPL", + "NQPL", + "NQPLHardNegatives", + "DBPediaPL", + "DBPediaPLHardNegatives", + "HotpotQAPL", + "HotpotQAPLHardNegatives", + "GeorgianFAQRetrieval", + "SwednRetrieval", + "SweFaqRetrieval", + "SlovakSumRetrieval", + "SKQuadRetrieval", + "SNLRetrieval", + "NorQuadRetrieval", + "GermanQuADRetrieval", + "GerDaLIRSmall", + "GermanDPR", + "GermanGovServiceRetrieval", + "LegalQuAD", + "GerDaLIR", + "SadeemQuestionRetrieval", + "TurHistQuadRetrieval", + "VieQuADRetrieval", + "DanFever", + "DanFeverRetrieval", + "TV2Nordretrieval", + "TwitterHjerneRetrieval", + "EstQA", + "Quail", + "Touche2020", + "Touche2020v3Retrieval", + "TempReasonL2Pure", + "LegalSummarization", + "NQ", + "NQHardNegatives", + "SIQA", + "MSMARCO", + "MSMARCOHardNegatives", + "DBPedia", + "DBPediaHardNegatives", + "NarrativeQARetrieval", + "MSMARCOv2", + "CQADupstackTexRetrieval", + "TRECCOVID", + "WinoGrande", + "QuoraRetrieval", + "QuoraRetrievalHardNegatives", + "AlphaNLI", + "LEMBNeedleRetrieval", + "LEMBPasskeyRetrieval", + "CQADupstackAndroidRetrieval", + "TempReasonL2Context", + "ARCChallenge", + "LegalBenchCorporateLobbying", + "SCIDOCS", + "MedicalQARetrieval", + "RARbCode", + "LEMBQMSumRetrieval", + "TempReasonL3Context", + "AILAStatutes", + "TopiOCQARetrieval", + "TopiOCQARetrievalHardNegatives", + "ClimateFEVER", + "ClimateFEVERHardNegatives", + "CQADupstackWordpressRetrieval", + "CQADupstackEnglishRetrieval", + "CQADupstackStatsRetrieval", + "MLQuestionsRetrieval", + "TempReasonL2Fact", + "CQADupstackGamingRetrieval", + "CQADupstackWebmastersRetrieval", + "CQADupstackUnixRetrieval", + "TempReasonL3Pure", + "CQADupstackPhysicsRetrieval", + "FiQA2018", + "LitSearchRetrieval", + "FeedbackQARetrieval", + "HagridRetrieval", + "FaithDialRetrieval", + "SciFact", + "CQADupstackMathematicaRetrieval", + "RARbMath", + "HellaSwag", + "PIQA", + "SpartQA", + "BrightRetrieval", + "TempReasonL1", + "HotpotQA", + "HotpotQAHardNegatives", + "LegalBenchConsumerContractsQA", + "ArguAna", + "LEMBWikimQARetrieval", + "TempReasonL3Fact", + "FEVER", + "FEVERHardNegatives", + "CQADupstackGisRetrieval", + "AILACasedocs", + "NFCorpus", + "LEMBSummScreenFDRetrieval", + "LEMBNarrativeQARetrieval", + "CQADupstackProgrammersRetrieval", + "JaGovFaqsRetrieval", + "NLPJournalAbsIntroRetrieval", + "JaqketRetrieval", + "NLPJournalTitleAbsRetrieval", + "JaQuADRetrieval", + "NLPJournalTitleIntroRetrieval", + "HunSum2AbstractiveRetrieval", + "AutoRAGRetrieval", + "KoStrategyQA", + "WikipediaRetrievalMultilingual", + "MintakaRetrieval", + "PublicHealthQARetrieval", + "CrossLingualSemanticDiscriminationWMT19", + "MultiLongDocRetrieval", + "MIRACLRetrieval", + "MIRACLRetrievalHardNegatives", + "NeuCLIR2022Retrieval", + "NeuCLIR2022RetrievalHardNegatives", + "StatcanDialogueDatasetRetrieval", + "IndicQARetrieval", + "NeuCLIR2023Retrieval", + "NeuCLIR2023RetrievalHardNegatives", + "CrossLingualSemanticDiscriminationWMT21", + "XMarket", + "XPQARetrieval", + "BelebeleRetrieval", + "CUREv1Retrieval", + "MLQARetrieval", + "XQuADRetrieval", + "MrTidyRetrieval", + "CodeTransOceanContestRetrieval", + "CodeTransOceanDLRetrieval", + "CodeFeedbackMT", + "CodeSearchNetCCRetrieval", + "StackOverflowQARetrieval", + "CodeFeedbackST", + "CosQARetrieval", + "CodeEditSearchRetrieval", + "SyntheticText2SQLRetrieval", + "AppsRetrieval", + "CodeSearchNetRetrieval", + "COIRCodeSearchNetRetrieval", + "RiaNewsRetrieval", + "RiaNewsRetrievalHardNegatives", + "RuBQRetrieval", + "GreekCivicsQA", + "AlloprofRetrieval", + "BSARDRetrieval", + "SyntecRetrieval", + "FQuADRetrieval", +] diff --git a/mteb/tasks/Retrieval/ara/__init__.py b/mteb/tasks/Retrieval/ara/__init__.py index e69de29bb2..a015313def 100644 --- a/mteb/tasks/Retrieval/ara/__init__.py +++ b/mteb/tasks/Retrieval/ara/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SadeemQuestionRetrieval import SadeemQuestionRetrieval + +__all__ = ["SadeemQuestionRetrieval"] diff --git a/mteb/tasks/Retrieval/code/__init__.py b/mteb/tasks/Retrieval/code/__init__.py index e69de29bb2..bc2a2fad79 100644 --- a/mteb/tasks/Retrieval/code/__init__.py +++ b/mteb/tasks/Retrieval/code/__init__.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from .AppsRetrieval import AppsRetrieval +from .CodeEditSearchRetrieval import CodeEditSearchRetrieval +from .CodeFeedbackMTRetrieval import CodeFeedbackMT +from .CodeFeedbackSTRetrieval import CodeFeedbackST +from .CodeSearchNetCCRetrieval import CodeSearchNetCCRetrieval +from .CodeSearchNetRetrieval import CodeSearchNetRetrieval +from .CodeTransOceanContestRetrieval import CodeTransOceanContestRetrieval +from .CodeTransOceanDLRetrieval import CodeTransOceanDLRetrieval +from .COIRCodeSearchNetRetrieval import COIRCodeSearchNetRetrieval +from .CosQARetrieval import CosQARetrieval +from .StackOverflowQARetrieval import StackOverflowQARetrieval +from .SyntheticText2SqlRetrieval import SyntheticText2SQLRetrieval + +__all__ = [ + "CodeTransOceanContestRetrieval", + "CodeTransOceanDLRetrieval", + "CodeFeedbackMT", + "CodeSearchNetCCRetrieval", + "StackOverflowQARetrieval", + "CodeFeedbackST", + "CosQARetrieval", + "CodeEditSearchRetrieval", + "SyntheticText2SQLRetrieval", + "AppsRetrieval", + "CodeSearchNetRetrieval", + "COIRCodeSearchNetRetrieval", +] diff --git a/mteb/tasks/Retrieval/dan/__init__.py b/mteb/tasks/Retrieval/dan/__init__.py index e69de29bb2..c1f49a1368 100644 --- a/mteb/tasks/Retrieval/dan/__init__.py +++ b/mteb/tasks/Retrieval/dan/__init__.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from .DanFeverRetrieval import DanFever, DanFeverRetrieval +from .TV2Nordretrieval import TV2Nordretrieval +from .TwitterHjerneRetrieval import TwitterHjerneRetrieval + +__all__ = [ + "DanFever", + "DanFeverRetrieval", + "TV2Nordretrieval", + "TwitterHjerneRetrieval", +] diff --git a/mteb/tasks/Retrieval/deu/__init__.py b/mteb/tasks/Retrieval/deu/__init__.py index e69de29bb2..886efbecc9 100644 --- a/mteb/tasks/Retrieval/deu/__init__.py +++ b/mteb/tasks/Retrieval/deu/__init__.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from .GerDaLIRRetrieval import GerDaLIR +from .GerDaLIRSmallRetrieval import GerDaLIRSmall +from .GermanDPRRetrieval import GermanDPR +from .GermanGovServiceRetrieval import GermanGovServiceRetrieval +from .GermanQuADRetrieval import GermanQuADRetrieval +from .LegalQuADRetrieval import LegalQuAD + +__all__ = [ + "GermanQuADRetrieval", + "GerDaLIRSmall", + "GermanDPR", + "GermanGovServiceRetrieval", + "LegalQuAD", + "GerDaLIR", +] diff --git a/mteb/tasks/Retrieval/ell/__init__.py b/mteb/tasks/Retrieval/ell/__init__.py index e69de29bb2..ac7ec59e4f 100644 --- a/mteb/tasks/Retrieval/ell/__init__.py +++ b/mteb/tasks/Retrieval/ell/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .GreekCivicsQA import GreekCivicsQA + +__all__ = ["GreekCivicsQA"] diff --git a/mteb/tasks/Retrieval/eng/__init__.py b/mteb/tasks/Retrieval/eng/__init__.py index e69de29bb2..47e2498709 100644 --- a/mteb/tasks/Retrieval/eng/__init__.py +++ b/mteb/tasks/Retrieval/eng/__init__.py @@ -0,0 +1,142 @@ +from __future__ import annotations + +from .AILACasedocsRetrieval import AILACasedocs +from .AILAStatutesRetrieval import AILAStatutes +from .AlphaNLIRetrieval import AlphaNLI +from .ARCChallengeRetrieval import ARCChallenge +from .ArguAnaRetrieval import ArguAna +from .BrightRetrieval import BrightRetrieval +from .ClimateFEVERRetrieval import ClimateFEVER, ClimateFEVERHardNegatives +from .CQADupstackAndroidRetrieval import CQADupstackAndroidRetrieval +from .CQADupstackEnglishRetrieval import CQADupstackEnglishRetrieval +from .CQADupstackGamingRetrieval import CQADupstackGamingRetrieval +from .CQADupstackGisRetrieval import CQADupstackGisRetrieval +from .CQADupstackMathematicaRetrieval import CQADupstackMathematicaRetrieval +from .CQADupstackPhysicsRetrieval import CQADupstackPhysicsRetrieval +from .CQADupstackProgrammersRetrieval import CQADupstackProgrammersRetrieval +from .CQADupstackStatsRetrieval import CQADupstackStatsRetrieval +from .CQADupstackTexRetrieval import CQADupstackTexRetrieval +from .CQADupstackUnixRetrieval import CQADupstackUnixRetrieval +from .CQADupstackWebmastersRetrieval import CQADupstackWebmastersRetrieval +from .CQADupstackWordpressRetrieval import CQADupstackWordpressRetrieval +from .DBPediaRetrieval import DBPedia, DBPediaHardNegatives +from .FaithDialRetrieval import FaithDialRetrieval +from .FeedbackQARetrieval import FeedbackQARetrieval +from .FEVERRetrieval import FEVER, FEVERHardNegatives +from .FiQA2018Retrieval import FiQA2018 +from .HagridRetrieval import HagridRetrieval +from .HellaSwagRetrieval import HellaSwag +from .HotpotQARetrieval import HotpotQA, HotpotQAHardNegatives +from .LegalBenchConsumerContractsQARetrieval import LegalBenchConsumerContractsQA +from .LegalBenchCorporateLobbyingRetrieval import LegalBenchCorporateLobbying +from .LegalSummarizationRetrieval import LegalSummarization +from .LEMBNarrativeQARetrieval import LEMBNarrativeQARetrieval +from .LEMBNeedleRetrieval import LEMBNeedleRetrieval +from .LEMBPasskeyRetrieval import LEMBPasskeyRetrieval +from .LEMBQMSumRetrieval import LEMBQMSumRetrieval +from .LEMBSummScreenFDRetrieval import LEMBSummScreenFDRetrieval +from .LEMBWikimQARetrieval import LEMBWikimQARetrieval +from .LitSearchRetrieval import LitSearchRetrieval +from .MedicalQARetrieval import MedicalQARetrieval +from .MLQuestions import MLQuestionsRetrieval +from .MSMARCORetrieval import MSMARCO, MSMARCOHardNegatives +from .MSMARCOv2Retrieval import MSMARCOv2 +from .NarrativeQARetrieval import NarrativeQARetrieval +from .NFCorpusRetrieval import NFCorpus +from .NQRetrieval import NQ, NQHardNegatives +from .PiqaRetrieval import PIQA +from .QuailRetrieval import Quail +from .QuoraRetrieval import QuoraRetrieval, QuoraRetrievalHardNegatives +from .RARbCodeRetrieval import RARbCode +from .RARbMathRetrieval import RARbMath +from .SCIDOCSRetrieval import SCIDOCS +from .SciFactRetrieval import SciFact +from .SiqaRetrieval import SIQA +from .SpartQARetrieval import SpartQA +from .TempReasonL1Retrieval import TempReasonL1 +from .TempReasonL2ContextRetrieval import TempReasonL2Context +from .TempReasonL2FactRetrieval import TempReasonL2Fact +from .TempReasonL2PureRetrieval import TempReasonL2Pure +from .TempReasonL3ContextRetrieval import TempReasonL3Context +from .TempReasonL3FactRetrieval import TempReasonL3Fact +from .TempReasonL3PureRetrieval import TempReasonL3Pure +from .TopiOCQARetrieval import TopiOCQARetrieval, TopiOCQARetrievalHardNegatives +from .Touche2020Retrieval import Touche2020, Touche2020v3Retrieval +from .TRECCOVIDRetrieval import TRECCOVID +from .WinoGrandeRetrieval import WinoGrande + +__all__ = [ + "Quail", + "Touche2020", + "Touche2020v3Retrieval", + "TempReasonL2Pure", + "LegalSummarization", + "NQ", + "NQHardNegatives", + "SIQA", + "MSMARCO", + "MSMARCOHardNegatives", + "DBPedia", + "DBPediaHardNegatives", + "NarrativeQARetrieval", + "MSMARCOv2", + "CQADupstackTexRetrieval", + "TRECCOVID", + "WinoGrande", + "QuoraRetrieval", + "QuoraRetrievalHardNegatives", + "AlphaNLI", + "LEMBNeedleRetrieval", + "LEMBPasskeyRetrieval", + "CQADupstackAndroidRetrieval", + "TempReasonL2Context", + "ARCChallenge", + "LegalBenchCorporateLobbying", + "SCIDOCS", + "MedicalQARetrieval", + "RARbCode", + "LEMBQMSumRetrieval", + "TempReasonL3Context", + "AILAStatutes", + "TopiOCQARetrieval", + "TopiOCQARetrievalHardNegatives", + "ClimateFEVER", + "ClimateFEVERHardNegatives", + "CQADupstackWordpressRetrieval", + "CQADupstackEnglishRetrieval", + "CQADupstackStatsRetrieval", + "MLQuestionsRetrieval", + "TempReasonL2Fact", + "CQADupstackGamingRetrieval", + "CQADupstackWebmastersRetrieval", + "CQADupstackUnixRetrieval", + "TempReasonL3Pure", + "CQADupstackPhysicsRetrieval", + "FiQA2018", + "LitSearchRetrieval", + "FeedbackQARetrieval", + "HagridRetrieval", + "FaithDialRetrieval", + "SciFact", + "CQADupstackMathematicaRetrieval", + "RARbMath", + "HellaSwag", + "PIQA", + "SpartQA", + "BrightRetrieval", + "TempReasonL1", + "HotpotQA", + "HotpotQAHardNegatives", + "LegalBenchConsumerContractsQA", + "ArguAna", + "LEMBWikimQARetrieval", + "TempReasonL3Fact", + "FEVER", + "FEVERHardNegatives", + "CQADupstackGisRetrieval", + "AILACasedocs", + "NFCorpus", + "LEMBSummScreenFDRetrieval", + "LEMBNarrativeQARetrieval", + "CQADupstackProgrammersRetrieval", +] diff --git a/mteb/tasks/Retrieval/est/__init__.py b/mteb/tasks/Retrieval/est/__init__.py index e69de29bb2..45c701f6dc 100644 --- a/mteb/tasks/Retrieval/est/__init__.py +++ b/mteb/tasks/Retrieval/est/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .estqa import EstQA + +__all__ = ["EstQA"] diff --git a/mteb/tasks/Retrieval/fra/__init__.py b/mteb/tasks/Retrieval/fra/__init__.py index e69de29bb2..b3441c8122 100644 --- a/mteb/tasks/Retrieval/fra/__init__.py +++ b/mteb/tasks/Retrieval/fra/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from .AlloprofRetrieval import AlloprofRetrieval +from .BSARDRetrieval import BSARDRetrieval +from .FQuADRetrieval import FQuADRetrieval +from .SyntecRetrieval import SyntecRetrieval + +__all__ = ["AlloprofRetrieval", "BSARDRetrieval", "SyntecRetrieval", "FQuADRetrieval"] diff --git a/mteb/tasks/Retrieval/hun/__init__.py b/mteb/tasks/Retrieval/hun/__init__.py new file mode 100644 index 0000000000..bd36e67f46 --- /dev/null +++ b/mteb/tasks/Retrieval/hun/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .HunSum2 import HunSum2AbstractiveRetrieval + +__all__ = ["HunSum2AbstractiveRetrieval"] diff --git a/mteb/tasks/Retrieval/jpn/__init__.py b/mteb/tasks/Retrieval/jpn/__init__.py new file mode 100644 index 0000000000..1296cb0a84 --- /dev/null +++ b/mteb/tasks/Retrieval/jpn/__init__.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from .JaGovFaqsRetrieval import JaGovFaqsRetrieval +from .JaqketRetrieval import JaqketRetrieval +from .JaQuADRetrieval import JaQuADRetrieval +from .NLPJournalAbsIntroRetrieval import NLPJournalAbsIntroRetrieval +from .NLPJournalTitleAbsRetrieval import NLPJournalTitleAbsRetrieval +from .NLPJournalTitleIntroRetrieval import NLPJournalTitleIntroRetrieval + +__all__ = [ + "JaGovFaqsRetrieval", + "NLPJournalAbsIntroRetrieval", + "JaqketRetrieval", + "NLPJournalTitleAbsRetrieval", + "JaQuADRetrieval", + "NLPJournalTitleIntroRetrieval", +] diff --git a/mteb/tasks/Retrieval/kat/__init__.py b/mteb/tasks/Retrieval/kat/__init__.py index e69de29bb2..026c6dc22b 100644 --- a/mteb/tasks/Retrieval/kat/__init__.py +++ b/mteb/tasks/Retrieval/kat/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .GeorgianFAQRetrieval import GeorgianFAQRetrieval + +__all__ = ["GeorgianFAQRetrieval"] diff --git a/mteb/tasks/Retrieval/kor/__init__.py b/mteb/tasks/Retrieval/kor/__init__.py index e69de29bb2..fa553802be 100644 --- a/mteb/tasks/Retrieval/kor/__init__.py +++ b/mteb/tasks/Retrieval/kor/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .AutoRAGRetrieval import AutoRAGRetrieval +from .KoStrategyQA import KoStrategyQA + +__all__ = ["AutoRAGRetrieval", "KoStrategyQA"] diff --git a/mteb/tasks/Retrieval/multilingual/__init__.py b/mteb/tasks/Retrieval/multilingual/__init__.py index e69de29bb2..7a2c850ad3 100644 --- a/mteb/tasks/Retrieval/multilingual/__init__.py +++ b/mteb/tasks/Retrieval/multilingual/__init__.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from .BelebeleRetrieval import BelebeleRetrieval +from .CrossLingualSemanticDiscriminationWMT19 import ( + CrossLingualSemanticDiscriminationWMT19, +) +from .CrossLingualSemanticDiscriminationWMT21 import ( + CrossLingualSemanticDiscriminationWMT21, +) +from .CUREv1Retrieval import CUREv1Retrieval +from .IndicQARetrieval import IndicQARetrieval +from .MintakaRetrieval import MintakaRetrieval +from .MIRACLRetrieval import MIRACLRetrieval, MIRACLRetrievalHardNegatives +from .MLQARetrieval import MLQARetrieval +from .MrTidyRetrieval import MrTidyRetrieval +from .MultiLongDocRetrieval import MultiLongDocRetrieval +from .NeuCLIR2022Retrieval import ( + NeuCLIR2022Retrieval, + NeuCLIR2022RetrievalHardNegatives, +) +from .NeuCLIR2023Retrieval import ( + NeuCLIR2023Retrieval, + NeuCLIR2023RetrievalHardNegatives, +) +from .PublicHealthQARetrieval import PublicHealthQARetrieval +from .StatcanDialogueDatasetRetrieval import StatcanDialogueDatasetRetrieval +from .WikipediaRetrievalMultilingual import WikipediaRetrievalMultilingual +from .XMarketRetrieval import XMarket +from .XPQARetrieval import XPQARetrieval +from .XQuADRetrieval import XQuADRetrieval + +__all__ = [ + "WikipediaRetrievalMultilingual", + "MintakaRetrieval", + "PublicHealthQARetrieval", + "CrossLingualSemanticDiscriminationWMT19", + "MultiLongDocRetrieval", + "MIRACLRetrieval", + "MIRACLRetrievalHardNegatives", + "NeuCLIR2022Retrieval", + "NeuCLIR2022RetrievalHardNegatives", + "StatcanDialogueDatasetRetrieval", + "IndicQARetrieval", + "NeuCLIR2023Retrieval", + "NeuCLIR2023RetrievalHardNegatives", + "CrossLingualSemanticDiscriminationWMT21", + "XMarket", + "XPQARetrieval", + "BelebeleRetrieval", + "CUREv1Retrieval", + "MLQARetrieval", + "XQuADRetrieval", + "MrTidyRetrieval", +] diff --git a/mteb/tasks/Retrieval/nob/__init__.py b/mteb/tasks/Retrieval/nob/__init__.py index e69de29bb2..5429ec3c91 100644 --- a/mteb/tasks/Retrieval/nob/__init__.py +++ b/mteb/tasks/Retrieval/nob/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .norquad import NorQuadRetrieval +from .snl_retrieval import SNLRetrieval + +__all__ = ["SNLRetrieval", "NorQuadRetrieval"] diff --git a/mteb/tasks/Retrieval/pol/__init__.py b/mteb/tasks/Retrieval/pol/__init__.py index e69de29bb2..47579b9ae8 100644 --- a/mteb/tasks/Retrieval/pol/__init__.py +++ b/mteb/tasks/Retrieval/pol/__init__.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from .ArguAnaPLRetrieval import ArguAnaPL +from .DBPediaPLRetrieval import DBPediaPL, DBPediaPLHardNegatives +from .FiQAPLRetrieval import FiQAPLRetrieval +from .HotpotQAPLRetrieval import HotpotQAPL, HotpotQAPLHardNegatives +from .MSMARCOPLRetrieval import MSMARCOPL, MSMARCOPLHardNegatives +from .NFCorpusPLRetrieval import NFCorpusPL +from .NQPLRetrieval import NQPL, NQPLHardNegatives +from .QuoraPLRetrieval import QuoraPLRetrieval, QuoraPLRetrievalHardNegatives +from .SCIDOCSPLRetrieval import SCIDOCSPL +from .SciFactPLRetrieval import SciFactPL +from .TRECCOVIDPLRetrieval import TRECCOVIDPL + +__all__ = [ + "MSMARCOPL", + "MSMARCOPLHardNegatives", + "SCIDOCSPL", + "SciFactPL", + "ArguAnaPL", + "FiQAPLRetrieval", + "NFCorpusPL", + "QuoraPLRetrieval", + "QuoraPLRetrievalHardNegatives", + "TRECCOVIDPL", + "NQPL", + "NQPLHardNegatives", + "DBPediaPL", + "DBPediaPLHardNegatives", + "HotpotQAPL", + "HotpotQAPLHardNegatives", +] diff --git a/mteb/tasks/Retrieval/rus/__init__.py b/mteb/tasks/Retrieval/rus/__init__.py index e69de29bb2..1f1aedfe79 100644 --- a/mteb/tasks/Retrieval/rus/__init__.py +++ b/mteb/tasks/Retrieval/rus/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .RiaNewsRetrieval import RiaNewsRetrieval, RiaNewsRetrievalHardNegatives +from .RuBQRetrieval import RuBQRetrieval + +__all__ = ["RiaNewsRetrieval", "RiaNewsRetrievalHardNegatives", "RuBQRetrieval"] diff --git a/mteb/tasks/Retrieval/slk/__init__.py b/mteb/tasks/Retrieval/slk/__init__.py index e69de29bb2..2b24947f0d 100644 --- a/mteb/tasks/Retrieval/slk/__init__.py +++ b/mteb/tasks/Retrieval/slk/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .SKQuadRetrieval import SKQuadRetrieval +from .SlovakSumRetrieval import SlovakSumRetrieval + +__all__ = ["SlovakSumRetrieval", "SKQuadRetrieval"] diff --git a/mteb/tasks/Retrieval/spa/__init__.py b/mteb/tasks/Retrieval/spa/__init__.py index e69de29bb2..39f47471aa 100644 --- a/mteb/tasks/Retrieval/spa/__init__.py +++ b/mteb/tasks/Retrieval/spa/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .SpanishPassageRetrievalS2P import SpanishPassageRetrievalS2P +from .SpanishPassageRetrievalS2S import SpanishPassageRetrievalS2S + +__all__ = ["SpanishPassageRetrievalS2S", "SpanishPassageRetrievalS2P"] diff --git a/mteb/tasks/Retrieval/swe/__init__.py b/mteb/tasks/Retrieval/swe/__init__.py index e69de29bb2..3478b1dc03 100644 --- a/mteb/tasks/Retrieval/swe/__init__.py +++ b/mteb/tasks/Retrieval/swe/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .SwednRetrieval import SwednRetrieval +from .SweFaqRetrieval import SweFaqRetrieval + +__all__ = ["SwednRetrieval", "SweFaqRetrieval"] diff --git a/mteb/tasks/Retrieval/tur/__init__.py b/mteb/tasks/Retrieval/tur/__init__.py index e69de29bb2..434608ee7c 100644 --- a/mteb/tasks/Retrieval/tur/__init__.py +++ b/mteb/tasks/Retrieval/tur/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .TurHistQuad import TurHistQuadRetrieval + +__all__ = ["TurHistQuadRetrieval"] diff --git a/mteb/tasks/Retrieval/vie/__init__.py b/mteb/tasks/Retrieval/vie/__init__.py index e69de29bb2..07a2d891c5 100644 --- a/mteb/tasks/Retrieval/vie/__init__.py +++ b/mteb/tasks/Retrieval/vie/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .VieQuADRetrieval import VieQuADRetrieval + +__all__ = ["VieQuADRetrieval"] diff --git a/mteb/tasks/Retrieval/zho/__init__.py b/mteb/tasks/Retrieval/zho/__init__.py index e69de29bb2..c4f399e542 100644 --- a/mteb/tasks/Retrieval/zho/__init__.py +++ b/mteb/tasks/Retrieval/zho/__init__.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from .CMTEBRetrieval import ( + CmedqaRetrieval, + CovidRetrieval, + DuRetrieval, + EcomRetrieval, + MedicalRetrieval, + MMarcoRetrieval, + T2Retrieval, + VideoRetrieval, +) +from .LeCaRDv2Retrieval import LeCaRDv2 + +__all__ = [ + "CmedqaRetrieval", + "CovidRetrieval", + "DuRetrieval", + "EcomRetrieval", + "MMarcoRetrieval", + "MedicalRetrieval", + "T2Retrieval", + "VideoRetrieval", + "LeCaRDv2", +] diff --git a/mteb/tasks/STS/__init__.py b/mteb/tasks/STS/__init__.py index b61b79b293..34be948cba 100644 --- a/mteb/tasks/STS/__init__.py +++ b/mteb/tasks/STS/__init__.py @@ -1,31 +1,72 @@ from __future__ import annotations -from .deu.GermanSTSBenchmarkSTS import * -from .eng.BiossesSTS import * -from .eng.SickrSTS import * -from .eng.STS12STS import * -from .eng.STS13STS import * -from .eng.STS14STS import * -from .eng.STS15STS import * -from .eng.STS16STS import * -from .eng.STSBenchmarkSTS import * -from .fao.FaroeseSTS import * -from .fin.FinParaSTS import * -from .fra.SickFrSTS import * -from .jpn.JSICK import * -from .jpn.JSTS import * -from .kor.KlueSTS import * -from .kor.KorSTS import * -from .multilingual.IndicCrosslingualSTS import * -from .multilingual.SemRel24STS import * -from .multilingual.STS17CrosslingualSTS import * -from .multilingual.STS22CrosslingualSTS import * -from .multilingual.STSBenchmarkMultilingualSTS import * -from .pol.PolishSTS import * -from .por.Assin2STS import * -from .por.SickBrSTS import * -from .ron.RonSTS import * -from .rus.RUParaPhraserSTS import * -from .rus.RuSTSBenchmarkSTS import * -from .spa.STSES import * -from .zho.CMTEBSTS import * +from .deu import GermanSTSBenchmarkSTS +from .eng import ( + STS12STS, + STS13STS, + STS14STS, + STS15STS, + STS16STS, + BiossesSTS, + SickrSTS, + STSBenchmarkSTS, +) +from .fao import FaroeseSTS +from .fin import FinParaSTS +from .fra import SickFrSTS +from .jpn import JSICK, JSTS +from .kor import KlueSTS, KorSTS +from .multilingual import ( + IndicCrosslingualSTS, + SemRel24STS, + STS17Crosslingual, + STS22CrosslingualSTS, + STS22CrosslingualSTSv2, + STSBenchmarkMultilingualSTS, +) +from .pol import CdscrSTS, SickrPLSTS +from .por import Assin2STS, SickBrSTS +from .ron import RonSTS +from .rus import RUParaPhraserSTS, RuSTSBenchmarkSTS +from .spa import STSES +from .zho import AFQMC, ATEC, BQ, LCQMC, PAWSX, QBQTC, STSB + +__all__ = [ + "AFQMC", + "ATEC", + "BQ", + "LCQMC", + "PAWSX", + "QBQTC", + "STSB", + "Assin2STS", + "SickBrSTS", + "STSES", + "CdscrSTS", + "SickrPLSTS", + "FinParaSTS", + "GermanSTSBenchmarkSTS", + "STS12STS", + "STS13STS", + "BiossesSTS", + "STS15STS", + "STSBenchmarkSTS", + "SickrSTS", + "STS16STS", + "STS14STS", + "FaroeseSTS", + "JSICK", + "JSTS", + "RonSTS", + "KorSTS", + "KlueSTS", + "IndicCrosslingualSTS", + "SemRel24STS", + "STS17Crosslingual", + "STS22CrosslingualSTS", + "STS22CrosslingualSTSv2", + "STSBenchmarkMultilingualSTS", + "RUParaPhraserSTS", + "RuSTSBenchmarkSTS", + "SickFrSTS", +] diff --git a/mteb/tasks/STS/deu/__init__.py b/mteb/tasks/STS/deu/__init__.py index e69de29bb2..0e996fbcbb 100644 --- a/mteb/tasks/STS/deu/__init__.py +++ b/mteb/tasks/STS/deu/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .GermanSTSBenchmarkSTS import GermanSTSBenchmarkSTS + +__all__ = ["GermanSTSBenchmarkSTS"] diff --git a/mteb/tasks/STS/eng/__init__.py b/mteb/tasks/STS/eng/__init__.py index e69de29bb2..fbf76028d8 100644 --- a/mteb/tasks/STS/eng/__init__.py +++ b/mteb/tasks/STS/eng/__init__.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from .BiossesSTS import BiossesSTS +from .SickrSTS import SickrSTS +from .STS12STS import STS12STS +from .STS13STS import STS13STS +from .STS14STS import STS14STS +from .STS15STS import STS15STS +from .STS16STS import STS16STS +from .STSBenchmarkSTS import STSBenchmarkSTS + +__all__ = [ + "STS12STS", + "STS13STS", + "BiossesSTS", + "STS15STS", + "STSBenchmarkSTS", + "SickrSTS", + "STS16STS", + "STS14STS", +] diff --git a/mteb/tasks/STS/fao/__init__.py b/mteb/tasks/STS/fao/__init__.py new file mode 100644 index 0000000000..1a53420b36 --- /dev/null +++ b/mteb/tasks/STS/fao/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .FaroeseSTS import FaroeseSTS + +__all__ = ["FaroeseSTS"] diff --git a/mteb/tasks/STS/fin/__init__.py b/mteb/tasks/STS/fin/__init__.py index e69de29bb2..6c142b6f3d 100644 --- a/mteb/tasks/STS/fin/__init__.py +++ b/mteb/tasks/STS/fin/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .FinParaSTS import FinParaSTS + +__all__ = ["FinParaSTS"] diff --git a/mteb/tasks/STS/fra/__init__.py b/mteb/tasks/STS/fra/__init__.py index e69de29bb2..01dd563a0f 100644 --- a/mteb/tasks/STS/fra/__init__.py +++ b/mteb/tasks/STS/fra/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SickFrSTS import SickFrSTS + +__all__ = ["SickFrSTS"] diff --git a/mteb/tasks/STS/jpn/__init__.py b/mteb/tasks/STS/jpn/__init__.py new file mode 100644 index 0000000000..61734f3c03 --- /dev/null +++ b/mteb/tasks/STS/jpn/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .JSICK import JSICK +from .JSTS import JSTS + +__all__ = ["JSICK", "JSTS"] diff --git a/mteb/tasks/STS/kor/__init__.py b/mteb/tasks/STS/kor/__init__.py index e69de29bb2..187f787ea3 100644 --- a/mteb/tasks/STS/kor/__init__.py +++ b/mteb/tasks/STS/kor/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .KlueSTS import KlueSTS +from .KorSTS import KorSTS + +__all__ = ["KorSTS", "KlueSTS"] diff --git a/mteb/tasks/STS/multilingual/__init__.py b/mteb/tasks/STS/multilingual/__init__.py index e69de29bb2..e5aa138e14 100644 --- a/mteb/tasks/STS/multilingual/__init__.py +++ b/mteb/tasks/STS/multilingual/__init__.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from .IndicCrosslingualSTS import IndicCrosslingualSTS +from .SemRel24STS import SemRel24STS +from .STS17CrosslingualSTS import STS17Crosslingual +from .STS22CrosslingualSTS import STS22CrosslingualSTS, STS22CrosslingualSTSv2 +from .STSBenchmarkMultilingualSTS import STSBenchmarkMultilingualSTS + +__all__ = [ + "IndicCrosslingualSTS", + "SemRel24STS", + "STS17Crosslingual", + "STS22CrosslingualSTS", + "STS22CrosslingualSTSv2", + "STSBenchmarkMultilingualSTS", +] diff --git a/mteb/tasks/STS/pol/__init__.py b/mteb/tasks/STS/pol/__init__.py index e69de29bb2..b9975c7b4f 100644 --- a/mteb/tasks/STS/pol/__init__.py +++ b/mteb/tasks/STS/pol/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .PolishSTS import CdscrSTS, SickrPLSTS + +__all__ = ["CdscrSTS", "SickrPLSTS"] diff --git a/mteb/tasks/STS/por/__init__.py b/mteb/tasks/STS/por/__init__.py new file mode 100644 index 0000000000..bc6c92f1ea --- /dev/null +++ b/mteb/tasks/STS/por/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .Assin2STS import Assin2STS +from .SickBrSTS import SickBrSTS + +__all__ = ["Assin2STS", "SickBrSTS"] diff --git a/mteb/tasks/STS/ron/__init__.py b/mteb/tasks/STS/ron/__init__.py new file mode 100644 index 0000000000..cae594b55c --- /dev/null +++ b/mteb/tasks/STS/ron/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .RonSTS import RonSTS + +__all__ = ["RonSTS"] diff --git a/mteb/tasks/STS/rus/__init__.py b/mteb/tasks/STS/rus/__init__.py new file mode 100644 index 0000000000..9d778719f0 --- /dev/null +++ b/mteb/tasks/STS/rus/__init__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .RUParaPhraserSTS import RUParaPhraserSTS +from .RuSTSBenchmarkSTS import RuSTSBenchmarkSTS + +__all__ = ["RUParaPhraserSTS", "RuSTSBenchmarkSTS"] diff --git a/mteb/tasks/STS/spa/__init__.py b/mteb/tasks/STS/spa/__init__.py index e69de29bb2..90ac2c3e75 100644 --- a/mteb/tasks/STS/spa/__init__.py +++ b/mteb/tasks/STS/spa/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .STSES import STSES + +__all__ = ["STSES"] diff --git a/mteb/tasks/STS/zho/__init__.py b/mteb/tasks/STS/zho/__init__.py index e69de29bb2..222d11ea6e 100644 --- a/mteb/tasks/STS/zho/__init__.py +++ b/mteb/tasks/STS/zho/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .CMTEBSTS import AFQMC, ATEC, BQ, LCQMC, PAWSX, QBQTC, STSB + +__all__ = ["AFQMC", "ATEC", "BQ", "LCQMC", "PAWSX", "QBQTC", "STSB"] diff --git a/mteb/tasks/SpeedTask/__init__.py b/mteb/tasks/SpeedTask/__init__.py index 5e9d2ce9bb..5ef332ae20 100644 --- a/mteb/tasks/SpeedTask/__init__.py +++ b/mteb/tasks/SpeedTask/__init__.py @@ -1,4 +1,6 @@ from __future__ import annotations -from .CPUSpeedTask import * -from .GPUSpeedTask import * +from .CPUSpeedTask import CPUSpeedTask +from .GPUSpeedTask import GPUSpeedTask + +__all__ = ["GPUSpeedTask", "CPUSpeedTask"] diff --git a/mteb/tasks/Summarization/__init__.py b/mteb/tasks/Summarization/__init__.py index 51c69766fe..0b8a424167 100644 --- a/mteb/tasks/Summarization/__init__.py +++ b/mteb/tasks/Summarization/__init__.py @@ -1,4 +1,11 @@ -from __future__ import annotations - -from .eng.SummEvalSummarization import * -from .fra.SummEvalFrSummarization import * +from __future__ import annotations + +from .eng import SummEvalSummarization, SummEvalSummarizationv2 +from .fra import SummEvalFrSummarization, SummEvalFrSummarizationv2 + +__all__ = [ + "SummEvalSummarization", + "SummEvalSummarizationv2", + "SummEvalFrSummarization", + "SummEvalFrSummarizationv2", +] diff --git a/mteb/tasks/Summarization/eng/__init__.py b/mteb/tasks/Summarization/eng/__init__.py index e69de29bb2..68ea2d1c73 100644 --- a/mteb/tasks/Summarization/eng/__init__.py +++ b/mteb/tasks/Summarization/eng/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SummEvalSummarization import SummEvalSummarization, SummEvalSummarizationv2 + +__all__ = ["SummEvalSummarization", "SummEvalSummarizationv2"] diff --git a/mteb/tasks/Summarization/fra/__init__.py b/mteb/tasks/Summarization/fra/__init__.py index e69de29bb2..8b8066ae4a 100644 --- a/mteb/tasks/Summarization/fra/__init__.py +++ b/mteb/tasks/Summarization/fra/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from .SummEvalFrSummarization import SummEvalFrSummarization, SummEvalFrSummarizationv2 + +__all__ = ["SummEvalFrSummarization", "SummEvalFrSummarizationv2"] diff --git a/mteb/tasks/__init__.py b/mteb/tasks/__init__.py index 72c357606f..6e09541e10 100644 --- a/mteb/tasks/__init__.py +++ b/mteb/tasks/__init__.py @@ -1,14 +1,1296 @@ from __future__ import annotations -from .BitextMining import * -from .Classification import * -from .Clustering import * -from .InstructionReranking import * -from .InstructionRetrieval import * -from .MultiLabelClassification import * -from .PairClassification import * -from .Reranking import * -from .Retrieval import * -from .SpeedTask import * -from .STS import * -from .Summarization import * +from .BitextMining import ( + BibleNLPBitextMining, + BornholmBitextMining, + BUCCBitextMining, + BUCCBitextMiningFast, + DiaBLaBitextMining, + FloresBitextMining, + IN22ConvBitextMining, + IN22GenBitextMining, + IndicGenBenchFloresBitextMining, + IWSLT2017BitextMining, + LinceMTBitextMining, + NollySentiBitextMining, + NorwegianCourtsBitextMining, + NTREXBitextMining, + NusaTranslationBitextMining, + NusaXBitextMining, + PhincBitextMining, + RomaTalesBitextMining, + SRNCorpusBitextMining, + TatoebaBitextMining, + TbilisiCityHallBitextMining, + VieMedEVBitextMining, +) +from .Classification import ( + AJGT, + AfriSentiClassification, + AfriSentiLangClassification, + AllegroReviewsClassification, + AmazonCounterfactualClassification, + AmazonPolarityClassification, + AmazonReviewsClassification, + AngryTweetsClassification, + ArxivClassification, + Banking77Classification, + BengaliDocumentClassification, + BengaliHateSpeechClassification, + BengaliSentimentAnalysis, + BulgarianStoreReviewSentimentClassfication, + CanadaTaxCourtOutcomesLegalBenchClassification, + CataloniaTweetClassification, + CbdClassification, + ContractNLIConfidentialityOfAgreementLegalBenchClassification, + ContractNLIExplicitIdentificationLegalBenchClassification, + ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification, + ContractNLILimitedUseLegalBenchClassification, + ContractNLINoLicensingLegalBenchClassification, + ContractNLINoticeOnCompelledDisclosureLegalBenchClassification, + ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification, + ContractNLIPermissibleCopyLegalBenchClassification, + ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification, + ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification, + ContractNLIReturnOfConfidentialInformationLegalBenchClassification, + ContractNLISharingWithEmployeesLegalBenchClassification, + ContractNLISharingWithThirdPartiesLegalBenchClassification, + ContractNLISurvivalOfObligationsLegalBenchClassification, + CorporateLobbyingLegalBenchClassification, + CSFDCZMovieReviewSentimentClassification, + CSFDSKMovieReviewSentimentClassification, + CUADAffiliateLicenseLicenseeLegalBenchClassification, + CUADAffiliateLicenseLicensorLegalBenchClassification, + CUADAntiAssignmentLegalBenchClassification, + CUADAuditRightsLegalBenchClassification, + CUADCapOnLiabilityLegalBenchClassification, + CUADChangeOfControlLegalBenchClassification, + CUADCompetitiveRestrictionExceptionLegalBenchClassification, + CUADCovenantNotToSueLegalBenchClassification, + CUADEffectiveDateLegalBenchClassification, + CUADExclusivityLegalBenchClassification, + CUADExpirationDateLegalBenchClassification, + CUADGoverningLawLegalBenchClassification, + CUADInsuranceLegalBenchClassification, + CUADIPOwnershipAssignmentLegalBenchClassification, + CUADIrrevocableOrPerpetualLicenseLegalBenchClassification, + CUADJointIPOwnershipLegalBenchClassification, + CUADLicenseGrantLegalBenchClassification, + CUADLiquidatedDamagesLegalBenchClassification, + CUADMinimumCommitmentLegalBenchClassification, + CUADMostFavoredNationLegalBenchClassification, + CUADNonCompeteLegalBenchClassification, + CUADNonDisparagementLegalBenchClassification, + CUADNonTransferableLicenseLegalBenchClassification, + CUADNoSolicitOfCustomersLegalBenchClassification, + CUADNoSolicitOfEmployeesLegalBenchClassification, + CUADNoticePeriodToTerminateRenewalLegalBenchClassification, + CUADPostTerminationServicesLegalBenchClassification, + CUADPriceRestrictionsLegalBenchClassification, + CUADRenewalTermLegalBenchClassification, + CUADRevenueProfitSharingLegalBenchClassification, + CUADRofrRofoRofnLegalBenchClassification, + CUADSourceCodeEscrowLegalBenchClassification, + CUADTerminationForConvenienceLegalBenchClassification, + CUADThirdPartyBeneficiaryLegalBenchClassification, + CUADUncappedLiabilityLegalBenchClassification, + CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification, + CUADVolumeRestrictionLegalBenchClassification, + CUADWarrantyDurationLegalBenchClassification, + CyrillicTurkicLangClassification, + CzechProductReviewSentimentClassification, + CzechSoMeSentimentClassification, + CzechSubjectivityClassification, + DalajClassification, + DanishPoliticalCommentsClassification, + DBpediaClassification, + DdiscoCohesionClassification, + DefinitionClassificationLegalBenchClassification, + Diversity1LegalBenchClassification, + Diversity2LegalBenchClassification, + Diversity3LegalBenchClassification, + Diversity4LegalBenchClassification, + Diversity5LegalBenchClassification, + Diversity6LegalBenchClassification, + DKHateClassification, + DutchBookReviewSentimentClassification, + EmotionClassification, + EstonianValenceClassification, + FilipinoHateSpeechClassification, + FilipinoShopeeReviewsClassification, + FinancialPhrasebankClassification, + FinToxicityClassification, + FrenchBookReviews, + FrenkEnClassification, + FrenkHrClassification, + FrenkSlClassification, + FunctionOfDecisionSectionLegalBenchClassification, + GeoreviewClassification, + GeorgianSentimentClassification, + GermanPoliticiansTwitterSentimentClassification, + GreekLegalCodeClassification, + GujaratiNewsClassification, + HateSpeechPortugueseClassification, + HeadlineClassification, + HebrewSentimentAnalysis, + HinDialectClassification, + HindiDiscourseClassification, + HotelReviewSentimentClassification, + IFlyTek, + ImdbClassification, + InappropriatenessClassification, + IndicLangClassification, + IndicNLPNewsClassification, + IndicSentimentClassification, + IndonesianIdClickbaitClassification, + IndonesianMongabayConservationClassification, + InsurancePolicyInterpretationLegalBenchClassification, + InternationalCitizenshipQuestionsLegalBenchClassification, + IsiZuluNewsClassification, + ItaCaseholdClassification, + ItalianLinguisticAcceptabilityClassification, + JavaneseIMDBClassification, + JCrewBlockerLegalBenchClassification, + JDReview, + KannadaNewsClassification, + KinopoiskClassification, + KlueTC, + KorFin, + KorHateClassification, + KorSarcasmClassification, + KurdishSentimentClassification, + LanguageClassification, + LccSentimentClassification, + LearnedHandsBenefitsLegalBenchClassification, + LearnedHandsBusinessLegalBenchClassification, + LearnedHandsConsumerLegalBenchClassification, + LearnedHandsCourtsLegalBenchClassification, + LearnedHandsCrimeLegalBenchClassification, + LearnedHandsDivorceLegalBenchClassification, + LearnedHandsDomesticViolenceLegalBenchClassification, + LearnedHandsEducationLegalBenchClassification, + LearnedHandsEmploymentLegalBenchClassification, + LearnedHandsEstatesLegalBenchClassification, + LearnedHandsFamilyLegalBenchClassification, + LearnedHandsHealthLegalBenchClassification, + LearnedHandsHousingLegalBenchClassification, + LearnedHandsImmigrationLegalBenchClassification, + LearnedHandsTortsLegalBenchClassification, + LearnedHandsTrafficLegalBenchClassification, + LegalReasoningCausalityLegalBenchClassification, + MacedonianTweetSentimentClassification, + MalayalamNewsClassification, + MarathiNewsClassification, + MasakhaNEWSClassification, + MassiveIntentClassification, + MassiveScenarioClassification, + MAUDLegalBenchClassification, + Moroco, + MovieReviewSentimentClassification, + MTOPDomainClassification, + MTOPIntentClassification, + MultiHateClassification, + MultilingualSentiment, + MultilingualSentimentClassification, + MyanmarNews, + NaijaSenti, + NepaliNewsClassification, + NewsClassification, + NordicLangClassification, + NoRecClassification, + NorwegianParliamentClassification, + NusaParagraphEmotionClassification, + NusaParagraphTopicClassification, + NusaXSentiClassification, + NYSJudicialEthicsLegalBenchClassification, + OdiaNewsClassification, + OnlineShopping, + OnlineStoreReviewSentimentClassification, + OPP115DataRetentionLegalBenchClassification, + OPP115DataSecurityLegalBenchClassification, + OPP115DoNotTrackLegalBenchClassification, + OPP115FirstPartyCollectionUseLegalBenchClassification, + OPP115InternationalAndSpecificAudiencesLegalBenchClassification, + OPP115PolicyChangeLegalBenchClassification, + OPP115ThirdPartySharingCollectionLegalBenchClassification, + OPP115UserAccessEditAndDeletionLegalBenchClassification, + OPP115UserChoiceControlLegalBenchClassification, + OralArgumentQuestionPurposeLegalBenchClassification, + OverrulingLegalBenchClassification, + PacClassification, + PatentClassification, + PersianFoodSentimentClassification, + PersonalJurisdictionLegalBenchClassification, + PoemSentimentClassification, + PolEmo2InClassification, + PolEmo2OutClassification, + PROALegalBenchClassification, + PunjabiNewsClassification, + RestaurantReviewSentimentClassification, + RomanianReviewsSentiment, + RomanianSentimentClassification, + RuReviewsClassification, + RuSciBenchGRNTIClassification, + RuSciBenchOECDClassification, + SanskritShlokasClassification, + ScalaClassification, + SCDBPAccountabilityLegalBenchClassification, + SCDBPAuditsLegalBenchClassification, + SCDBPCertificationLegalBenchClassification, + SCDBPTrainingLegalBenchClassification, + SCDBPVerificationLegalBenchClassification, + SCDDAccountabilityLegalBenchClassification, + SCDDAuditsLegalBenchClassification, + SCDDCertificationLegalBenchClassification, + SCDDTrainingLegalBenchClassification, + SCDDVerificationLegalBenchClassification, + SentimentAnalysisHindi, + SIB200Classification, + SinhalaNewsClassification, + SinhalaNewsSourceClassification, + SiswatiNewsClassification, + SlovakHateSpeechClassification, + SlovakMovieReviewSentimentClassification, + SouthAfricanLangClassification, + SpanishNewsClassification, + SpanishSentimentClassification, + SwahiliNewsClassification, + SwedishSentimentClassification, + SweRecClassification, + SwissJudgementClassification, + TamilNewsClassification, + TelemarketingSalesRuleLegalBenchClassification, + TeluguAndhraJyotiNewsClassification, + TenKGnadClassification, + TextualismToolDictionariesLegalBenchClassification, + TextualismToolPlainLegalBenchClassification, + TNews, + ToxicChatClassification, + ToxicConversationsClassification, + TswanaNewsClassification, + TurkicClassification, + TurkishMovieSentimentClassification, + TurkishProductSentimentClassification, + TweetEmotionClassification, + TweetSarcasmClassification, + TweetSentimentClassification, + TweetSentimentExtractionClassification, + TweetTopicSingleClassification, + UCCVCommonLawLegalBenchClassification, + UkrFormalityClassification, + UnfairTOSLegalBenchClassification, + UrduRomanSentimentClassification, + VieStudentFeedbackClassification, + Waimai, + WisesightSentimentClassification, + WongnaiReviewsClassification, + WRIMEClassification, + YahooAnswersTopicsClassification, + YelpReviewFullClassification, + YueOpenriceReviewClassification, +) +from .Clustering import ( + AlloProfClusteringP2P, + AlloProfClusteringP2PFast, + AlloProfClusteringS2S, + AlloProfClusteringS2SFast, + ArxivClusteringP2P, + ArxivClusteringP2PFast, + ArxivClusteringS2S, + ArXivHierarchicalClusteringP2P, + ArXivHierarchicalClusteringS2S, + BigPatentClustering, + BigPatentClusteringFast, + BiorxivClusteringP2P, + BiorxivClusteringP2PFast, + BiorxivClusteringS2S, + BiorxivClusteringS2SFast, + BlurbsClusteringP2P, + BlurbsClusteringP2PFast, + BlurbsClusteringS2S, + BlurbsClusteringS2SFast, + CLSClusteringFastP2P, + CLSClusteringFastS2S, + CLSClusteringP2P, + CLSClusteringS2S, + EightTagsClustering, + EightTagsClusteringFast, + GeoreviewClusteringP2P, + HALClusteringS2S, + HALClusteringS2SFast, + IndicReviewsClusteringP2P, + LivedoorNewsClustering, + LivedoorNewsClusteringv2, + MasakhaNEWSClusteringP2P, + MasakhaNEWSClusteringS2S, + MedrxivClusteringP2P, + MedrxivClusteringP2PFast, + MedrxivClusteringS2S, + MedrxivClusteringS2SFast, + MewsC16JaClustering, + MLSUMClusteringP2P, + MLSUMClusteringP2PFast, + MLSUMClusteringS2S, + MLSUMClusteringS2SFast, + PlscClusteringP2P, + PlscClusteringP2PFast, + PlscClusteringS2S, + PlscClusteringS2SFast, + RedditClustering, + RedditClusteringP2P, + RedditFastClusteringP2P, + RedditFastClusteringS2S, + RomaniBibleClustering, + RuSciBenchGRNTIClusteringP2P, + RuSciBenchOECDClusteringP2P, + SIB200ClusteringFast, + SNLClustering, + SNLHierarchicalClusteringP2P, + SNLHierarchicalClusteringS2S, + SpanishNewsClusteringP2P, + StackExchangeClustering, + StackExchangeClusteringFast, + StackExchangeClusteringP2P, + StackExchangeClusteringP2PFast, + SwednClustering, + SwednClusteringFastS2S, + SwednClusteringP2P, + TenKGnadClusteringP2P, + TenKGnadClusteringP2PFast, + TenKGnadClusteringS2S, + TenKGnadClusteringS2SFast, + ThuNewsClusteringFastP2P, + ThuNewsClusteringFastS2S, + ThuNewsClusteringP2P, + ThuNewsClusteringS2S, + TwentyNewsgroupsClustering, + TwentyNewsgroupsClusteringFast, + VGClustering, + VGHierarchicalClusteringP2P, + VGHierarchicalClusteringS2S, + WikiCitiesClustering, + WikiClusteringFastP2P, + WikiClusteringP2P, +) +from .InstructionReranking import ( + Core17InstructionRetrieval, + News21InstructionRetrieval, + Robust04InstructionRetrieval, + mFollowIR, + mFollowIRCrossLingual, +) +from .InstructionRetrieval import InstructIR +from .MultiLabelClassification import ( + BrazilianToxicTweetsClassification, + CEDRClassification, + KorHateSpeechMLClassification, + MalteseNewsClassification, + MultiEURLEXMultilabelClassification, + SensitiveTopicsClassification, +) +from .PairClassification import ( + RTE3, + XNLI, + XNLIV2, + ArEntail, + ArmenianParaphrasePC, + Assin2RTE, + CdscePC, + Cmnli, + CTKFactsNLI, + FalseFriendsDeEnPC, + FarsTail, + IndicXnliPairClassification, + IndoNLI, + KlueNLI, + LegalBenchPC, + Ocnli, + OpusparcusPC, + PawsXPairClassification, + PpcPC, + PscPC, + SickBrPC, + SickePLPC, + SprintDuplicateQuestionsPC, + TERRa, + TwitterSemEval2015PC, + TwitterURLCorpusPC, + XStance, +) +from .Reranking import ( + AlloprofReranking, + AskUbuntuDupQuestions, + CMedQAv1, + CMedQAv2, + ESCIReranking, + MindSmallReranking, + MIRACLReranking, + MMarcoReranking, + NevIR, + RuBQReranking, + SciDocsReranking, + StackOverflowDupQuestions, + SyntecReranking, + T2Reranking, + VoyageMMarcoReranking, + WebLINXCandidatesReranking, + WikipediaRerankingMultilingual, +) +from .Retrieval import ( + FEVER, + MSMARCO, + MSMARCOPL, + NQ, + NQPL, + PIQA, + SCIDOCS, + SCIDOCSPL, + SIQA, + TRECCOVID, + TRECCOVIDPL, + AILACasedocs, + AILAStatutes, + AlloprofRetrieval, + AlphaNLI, + AppsRetrieval, + ARCChallenge, + ArguAna, + ArguAnaPL, + AutoRAGRetrieval, + BelebeleRetrieval, + BrightRetrieval, + BSARDRetrieval, + ClimateFEVER, + ClimateFEVERHardNegatives, + CmedqaRetrieval, + CodeEditSearchRetrieval, + CodeFeedbackMT, + CodeFeedbackST, + CodeSearchNetCCRetrieval, + CodeSearchNetRetrieval, + CodeTransOceanContestRetrieval, + CodeTransOceanDLRetrieval, + COIRCodeSearchNetRetrieval, + CosQARetrieval, + CovidRetrieval, + CQADupstackAndroidRetrieval, + CQADupstackEnglishRetrieval, + CQADupstackGamingRetrieval, + CQADupstackGisRetrieval, + CQADupstackMathematicaRetrieval, + CQADupstackPhysicsRetrieval, + CQADupstackProgrammersRetrieval, + CQADupstackStatsRetrieval, + CQADupstackTexRetrieval, + CQADupstackUnixRetrieval, + CQADupstackWebmastersRetrieval, + CQADupstackWordpressRetrieval, + CrossLingualSemanticDiscriminationWMT19, + CrossLingualSemanticDiscriminationWMT21, + CUREv1Retrieval, + DanFever, + DanFeverRetrieval, + DBPedia, + DBPediaHardNegatives, + DBPediaPL, + DBPediaPLHardNegatives, + DuRetrieval, + EcomRetrieval, + EstQA, + FaithDialRetrieval, + FeedbackQARetrieval, + FEVERHardNegatives, + FiQA2018, + FiQAPLRetrieval, + FQuADRetrieval, + GeorgianFAQRetrieval, + GerDaLIR, + GerDaLIRSmall, + GermanDPR, + GermanGovServiceRetrieval, + GermanQuADRetrieval, + GreekCivicsQA, + HagridRetrieval, + HellaSwag, + HotpotQA, + HotpotQAHardNegatives, + HotpotQAPL, + HotpotQAPLHardNegatives, + HunSum2AbstractiveRetrieval, + IndicQARetrieval, + JaGovFaqsRetrieval, + JaqketRetrieval, + JaQuADRetrieval, + KoStrategyQA, + LeCaRDv2, + LegalBenchConsumerContractsQA, + LegalBenchCorporateLobbying, + LegalQuAD, + LegalSummarization, + LEMBNarrativeQARetrieval, + LEMBNeedleRetrieval, + LEMBPasskeyRetrieval, + LEMBQMSumRetrieval, + LEMBSummScreenFDRetrieval, + LEMBWikimQARetrieval, + LitSearchRetrieval, + MedicalQARetrieval, + MedicalRetrieval, + MintakaRetrieval, + MIRACLRetrieval, + MIRACLRetrievalHardNegatives, + MLQARetrieval, + MLQuestionsRetrieval, + MMarcoRetrieval, + MrTidyRetrieval, + MSMARCOHardNegatives, + MSMARCOPLHardNegatives, + MSMARCOv2, + MultiLongDocRetrieval, + NarrativeQARetrieval, + NeuCLIR2022Retrieval, + NeuCLIR2022RetrievalHardNegatives, + NeuCLIR2023Retrieval, + NeuCLIR2023RetrievalHardNegatives, + NFCorpus, + NFCorpusPL, + NLPJournalAbsIntroRetrieval, + NLPJournalTitleAbsRetrieval, + NLPJournalTitleIntroRetrieval, + NorQuadRetrieval, + NQHardNegatives, + NQPLHardNegatives, + PublicHealthQARetrieval, + Quail, + QuoraPLRetrieval, + QuoraPLRetrievalHardNegatives, + QuoraRetrieval, + QuoraRetrievalHardNegatives, + RARbCode, + RARbMath, + RiaNewsRetrieval, + RiaNewsRetrievalHardNegatives, + RuBQRetrieval, + SadeemQuestionRetrieval, + SciFact, + SciFactPL, + SKQuadRetrieval, + SlovakSumRetrieval, + SNLRetrieval, + SpanishPassageRetrievalS2P, + SpanishPassageRetrievalS2S, + SpartQA, + StackOverflowQARetrieval, + StatcanDialogueDatasetRetrieval, + SwednRetrieval, + SweFaqRetrieval, + SyntecRetrieval, + SyntheticText2SQLRetrieval, + T2Retrieval, + TempReasonL1, + TempReasonL2Context, + TempReasonL2Fact, + TempReasonL2Pure, + TempReasonL3Context, + TempReasonL3Fact, + TempReasonL3Pure, + TopiOCQARetrieval, + TopiOCQARetrievalHardNegatives, + Touche2020, + Touche2020v3Retrieval, + TurHistQuadRetrieval, + TV2Nordretrieval, + TwitterHjerneRetrieval, + VideoRetrieval, + VieQuADRetrieval, + WikipediaRetrievalMultilingual, + WinoGrande, + XMarket, + XPQARetrieval, + XQuADRetrieval, +) +from .SpeedTask import CPUSpeedTask, GPUSpeedTask +from .STS import ( + AFQMC, + ATEC, + BQ, + JSICK, + JSTS, + LCQMC, + PAWSX, + QBQTC, + STS12STS, + STS13STS, + STS14STS, + STS15STS, + STS16STS, + STSB, + STSES, + Assin2STS, + BiossesSTS, + CdscrSTS, + FaroeseSTS, + FinParaSTS, + GermanSTSBenchmarkSTS, + IndicCrosslingualSTS, + KlueSTS, + KorSTS, + RonSTS, + RUParaPhraserSTS, + RuSTSBenchmarkSTS, + SemRel24STS, + SickBrSTS, + SickFrSTS, + SickrPLSTS, + SickrSTS, + STS17Crosslingual, + STS22CrosslingualSTS, + STS22CrosslingualSTSv2, + STSBenchmarkMultilingualSTS, + STSBenchmarkSTS, +) +from .Summarization import ( + SummEvalFrSummarization, + SummEvalFrSummarizationv2, + SummEvalSummarization, + SummEvalSummarizationv2, +) + +__all__ = [ + "CLSClusteringFastP2P", + "CLSClusteringFastS2S", + "CLSClusteringP2P", + "CLSClusteringS2S", + "ThuNewsClusteringFastP2P", + "ThuNewsClusteringFastS2S", + "ThuNewsClusteringP2P", + "ThuNewsClusteringS2S", + "SpanishNewsClusteringP2P", + "EightTagsClustering", + "EightTagsClusteringFast", + "PlscClusteringP2P", + "PlscClusteringP2PFast", + "PlscClusteringS2S", + "PlscClusteringS2SFast", + "SwednClustering", + "SwednClusteringFastS2S", + "SwednClusteringP2P", + "VGClustering", + "SNLHierarchicalClusteringP2P", + "SNLHierarchicalClusteringS2S", + "SNLClustering", + "VGHierarchicalClusteringP2P", + "VGHierarchicalClusteringS2S", + "BlurbsClusteringS2S", + "BlurbsClusteringS2SFast", + "TenKGnadClusteringP2P", + "TenKGnadClusteringP2PFast", + "TenKGnadClusteringS2S", + "TenKGnadClusteringS2SFast", + "BlurbsClusteringP2P", + "BlurbsClusteringP2PFast", + "RomaniBibleClustering", + "MedrxivClusteringS2S", + "MedrxivClusteringS2SFast", + "BiorxivClusteringS2S", + "BiorxivClusteringS2SFast", + "StackExchangeClustering", + "StackExchangeClusteringFast", + "RedditClustering", + "RedditFastClusteringS2S", + "ArxivClusteringS2S", + "ArxivClusteringP2P", + "ArxivClusteringP2PFast", + "MedrxivClusteringP2P", + "MedrxivClusteringP2PFast", + "WikiCitiesClustering", + "BiorxivClusteringP2P", + "BiorxivClusteringP2PFast", + "TwentyNewsgroupsClustering", + "TwentyNewsgroupsClusteringFast", + "ArXivHierarchicalClusteringP2P", + "ArXivHierarchicalClusteringS2S", + "BigPatentClustering", + "BigPatentClusteringFast", + "StackExchangeClusteringP2P", + "StackExchangeClusteringP2PFast", + "RedditClusteringP2P", + "RedditFastClusteringP2P", + "LivedoorNewsClustering", + "LivedoorNewsClusteringv2", + "MewsC16JaClustering", + "WikiClusteringFastP2P", + "WikiClusteringP2P", + "MLSUMClusteringS2S", + "MLSUMClusteringS2SFast", + "MasakhaNEWSClusteringS2S", + "MLSUMClusteringP2P", + "MLSUMClusteringP2PFast", + "IndicReviewsClusteringP2P", + "SIB200ClusteringFast", + "MasakhaNEWSClusteringP2P", + "GeoreviewClusteringP2P", + "RuSciBenchGRNTIClusteringP2P", + "RuSciBenchOECDClusteringP2P", + "HALClusteringS2S", + "HALClusteringS2SFast", + "AlloProfClusteringS2S", + "AlloProfClusteringS2SFast", + "AlloProfClusteringP2P", + "AlloProfClusteringP2PFast", + "SummEvalSummarization", + "SummEvalSummarizationv2", + "SummEvalFrSummarization", + "SummEvalFrSummarizationv2", + "GPUSpeedTask", + "CPUSpeedTask", + "News21InstructionRetrieval", + "Core17InstructionRetrieval", + "Robust04InstructionRetrieval", + "mFollowIR", + "mFollowIRCrossLingual", + "BrazilianToxicTweetsClassification", + "MalteseNewsClassification", + "KorHateSpeechMLClassification", + "MultiEURLEXMultilabelClassification", + "SensitiveTopicsClassification", + "CEDRClassification", + "TbilisiCityHallBitextMining", + "VieMedEVBitextMining", + "BornholmBitextMining", + "SRNCorpusBitextMining", + "IN22ConvBitextMining", + "IN22GenBitextMining", + "BUCCBitextMining", + "LinceMTBitextMining", + "NusaTranslationBitextMining", + "DiaBLaBitextMining", + "NTREXBitextMining", + "IndicGenBenchFloresBitextMining", + "NollySentiBitextMining", + "BUCCBitextMiningFast", + "PhincBitextMining", + "TatoebaBitextMining", + "NusaXBitextMining", + "IWSLT2017BitextMining", + "BibleNLPBitextMining", + "FloresBitextMining", + "RomaTalesBitextMining", + "NorwegianCourtsBitextMining", + "Cmnli", + "Ocnli", + "Assin2RTE", + "SickBrPC", + "CdscePC", + "PpcPC", + "PscPC", + "SickePLPC", + "IndoNLI", + "FalseFriendsDeEnPC", + "ArEntail", + "ArmenianParaphrasePC", + "CTKFactsNLI", + "LegalBenchPC", + "TwitterSemEval2015PC", + "TwitterURLCorpusPC", + "SprintDuplicateQuestionsPC", + "FarsTail", + "KlueNLI", + "IndicXnliPairClassification", + "OpusparcusPC", + "PawsXPairClassification", + "RTE3", + "XStance", + "XNLI", + "XNLIV2", + "TERRa", + "TeluguAndhraJyotiNewsClassification", + "IFlyTek", + "JDReview", + "MultilingualSentiment", + "OnlineShopping", + "TNews", + "Waimai", + "YueOpenriceReviewClassification", + "HateSpeechPortugueseClassification", + "SpanishNewsClassification", + "SpanishSentimentClassification", + "AllegroReviewsClassification", + "CbdClassification", + "PacClassification", + "PolEmo2InClassification", + "PolEmo2OutClassification", + "BulgarianStoreReviewSentimentClassfication", + "KurdishSentimentClassification", + "ItaCaseholdClassification", + "ItalianLinguisticAcceptabilityClassification", + "GeorgianSentimentClassification", + "DalajClassification", + "SweRecClassification", + "SwedishSentimentClassification", + "CSFDSKMovieReviewSentimentClassification", + "SlovakHateSpeechClassification", + "NorwegianParliamentClassification", + "NoRecClassification", + "FilipinoHateSpeechClassification", + "FilipinoShopeeReviewsClassification", + "MarathiNewsClassification", + "IndonesianIdClickbaitClassification", + "IndonesianMongabayConservationClassification", + "UrduRomanSentimentClassification", + "MacedonianTweetSentimentClassification", + "FrenkSlClassification", + "SwahiliNewsClassification", + "FinToxicityClassification", + "KannadaNewsClassification", + "TenKGnadClassification", + "GermanPoliticiansTwitterSentimentClassification", + "PunjabiNewsClassification", + "TswanaNewsClassification", + "TweetSarcasmClassification", + "TweetEmotionClassification", + "RestaurantReviewSentimentClassification", + "HotelReviewSentimentClassification", + "OnlineStoreReviewSentimentClassification", + "AJGT", + "TurkishProductSentimentClassification", + "TurkishMovieSentimentClassification", + "NepaliNewsClassification", + "VieStudentFeedbackClassification", + "DutchBookReviewSentimentClassification", + "SiswatiNewsClassification", + "UkrFormalityClassification", + "SanskritShlokasClassification", + "SlovakMovieReviewSentimentClassification", + "AngryTweetsClassification", + "DdiscoCohesionClassification", + "DanishPoliticalCommentsClassification", + "DKHateClassification", + "LccSentimentClassification", + "TamilNewsClassification", + "CSFDCZMovieReviewSentimentClassification", + "CzechSubjectivityClassification", + "CzechProductReviewSentimentClassification", + "CzechSoMeSentimentClassification", + "EstonianValenceClassification", + "MyanmarNews", + "JavaneseIMDBClassification", + "YahooAnswersTopicsClassification", + "CUADAffiliateLicenseLicenseeLegalBenchClassification", + "CUADAffiliateLicenseLicensorLegalBenchClassification", + "CUADAntiAssignmentLegalBenchClassification", + "CUADAuditRightsLegalBenchClassification", + "CUADCapOnLiabilityLegalBenchClassification", + "CUADChangeOfControlLegalBenchClassification", + "CUADCompetitiveRestrictionExceptionLegalBenchClassification", + "CUADCovenantNotToSueLegalBenchClassification", + "CUADEffectiveDateLegalBenchClassification", + "CUADExclusivityLegalBenchClassification", + "CUADExpirationDateLegalBenchClassification", + "CUADGoverningLawLegalBenchClassification", + "CUADIPOwnershipAssignmentLegalBenchClassification", + "CUADInsuranceLegalBenchClassification", + "CUADIrrevocableOrPerpetualLicenseLegalBenchClassification", + "CUADJointIPOwnershipLegalBenchClassification", + "CUADLicenseGrantLegalBenchClassification", + "CUADLiquidatedDamagesLegalBenchClassification", + "CUADMinimumCommitmentLegalBenchClassification", + "CUADMostFavoredNationLegalBenchClassification", + "CUADNoSolicitOfCustomersLegalBenchClassification", + "CUADNoSolicitOfEmployeesLegalBenchClassification", + "CUADNonCompeteLegalBenchClassification", + "CUADNonDisparagementLegalBenchClassification", + "CUADNonTransferableLicenseLegalBenchClassification", + "CUADNoticePeriodToTerminateRenewalLegalBenchClassification", + "CUADPostTerminationServicesLegalBenchClassification", + "CUADPriceRestrictionsLegalBenchClassification", + "CUADRenewalTermLegalBenchClassification", + "CUADRevenueProfitSharingLegalBenchClassification", + "CUADRofrRofoRofnLegalBenchClassification", + "CUADSourceCodeEscrowLegalBenchClassification", + "CUADTerminationForConvenienceLegalBenchClassification", + "CUADThirdPartyBeneficiaryLegalBenchClassification", + "CUADUncappedLiabilityLegalBenchClassification", + "CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification", + "CUADVolumeRestrictionLegalBenchClassification", + "CUADWarrantyDurationLegalBenchClassification", + "CanadaTaxCourtOutcomesLegalBenchClassification", + "ContractNLIConfidentialityOfAgreementLegalBenchClassification", + "ContractNLIExplicitIdentificationLegalBenchClassification", + "ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification", + "ContractNLILimitedUseLegalBenchClassification", + "ContractNLINoLicensingLegalBenchClassification", + "ContractNLINoticeOnCompelledDisclosureLegalBenchClassification", + "ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification", + "ContractNLIPermissibleCopyLegalBenchClassification", + "ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification", + "ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification", + "ContractNLIReturnOfConfidentialInformationLegalBenchClassification", + "ContractNLISharingWithEmployeesLegalBenchClassification", + "ContractNLISharingWithThirdPartiesLegalBenchClassification", + "ContractNLISurvivalOfObligationsLegalBenchClassification", + "CorporateLobbyingLegalBenchClassification", + "DefinitionClassificationLegalBenchClassification", + "Diversity1LegalBenchClassification", + "Diversity2LegalBenchClassification", + "Diversity3LegalBenchClassification", + "Diversity4LegalBenchClassification", + "Diversity5LegalBenchClassification", + "Diversity6LegalBenchClassification", + "FunctionOfDecisionSectionLegalBenchClassification", + "InsurancePolicyInterpretationLegalBenchClassification", + "InternationalCitizenshipQuestionsLegalBenchClassification", + "JCrewBlockerLegalBenchClassification", + "LearnedHandsBenefitsLegalBenchClassification", + "LearnedHandsBusinessLegalBenchClassification", + "LearnedHandsConsumerLegalBenchClassification", + "LearnedHandsCourtsLegalBenchClassification", + "LearnedHandsCrimeLegalBenchClassification", + "LearnedHandsDivorceLegalBenchClassification", + "LearnedHandsDomesticViolenceLegalBenchClassification", + "LearnedHandsEducationLegalBenchClassification", + "LearnedHandsEmploymentLegalBenchClassification", + "LearnedHandsEstatesLegalBenchClassification", + "LearnedHandsFamilyLegalBenchClassification", + "LearnedHandsHealthLegalBenchClassification", + "LearnedHandsHousingLegalBenchClassification", + "LearnedHandsImmigrationLegalBenchClassification", + "LearnedHandsTortsLegalBenchClassification", + "LearnedHandsTrafficLegalBenchClassification", + "LegalReasoningCausalityLegalBenchClassification", + "MAUDLegalBenchClassification", + "NYSJudicialEthicsLegalBenchClassification", + "OPP115DataRetentionLegalBenchClassification", + "OPP115DataSecurityLegalBenchClassification", + "OPP115DoNotTrackLegalBenchClassification", + "OPP115FirstPartyCollectionUseLegalBenchClassification", + "OPP115InternationalAndSpecificAudiencesLegalBenchClassification", + "OPP115PolicyChangeLegalBenchClassification", + "OPP115ThirdPartySharingCollectionLegalBenchClassification", + "OPP115UserAccessEditAndDeletionLegalBenchClassification", + "OPP115UserChoiceControlLegalBenchClassification", + "OralArgumentQuestionPurposeLegalBenchClassification", + "OverrulingLegalBenchClassification", + "PROALegalBenchClassification", + "PersonalJurisdictionLegalBenchClassification", + "SCDBPAccountabilityLegalBenchClassification", + "SCDBPAuditsLegalBenchClassification", + "SCDBPCertificationLegalBenchClassification", + "SCDBPTrainingLegalBenchClassification", + "SCDBPVerificationLegalBenchClassification", + "SCDDAccountabilityLegalBenchClassification", + "SCDDAuditsLegalBenchClassification", + "SCDDCertificationLegalBenchClassification", + "SCDDTrainingLegalBenchClassification", + "SCDDVerificationLegalBenchClassification", + "TelemarketingSalesRuleLegalBenchClassification", + "TextualismToolDictionariesLegalBenchClassification", + "TextualismToolPlainLegalBenchClassification", + "UCCVCommonLawLegalBenchClassification", + "UnfairTOSLegalBenchClassification", + "FinancialPhrasebankClassification", + "DBpediaClassification", + "FrenkEnClassification", + "TweetTopicSingleClassification", + "AmazonPolarityClassification", + "NewsClassification", + "ToxicChatClassification", + "YelpReviewFullClassification", + "ToxicConversationsClassification", + "TweetSentimentExtractionClassification", + "PatentClassification", + "ImdbClassification", + "ArxivClassification", + "EmotionClassification", + "PoemSentimentClassification", + "Banking77Classification", + "PersianFoodSentimentClassification", + "HebrewSentimentAnalysis", + "BengaliSentimentAnalysis", + "BengaliDocumentClassification", + "BengaliHateSpeechClassification", + "SinhalaNewsSourceClassification", + "SinhalaNewsClassification", + "WisesightSentimentClassification", + "WongnaiReviewsClassification", + "WRIMEClassification", + "RomanianReviewsSentiment", + "Moroco", + "RomanianSentimentClassification", + "KorSarcasmClassification", + "KorHateClassification", + "KorFin", + "KlueTC", + "IndicLangClassification", + "SouthAfricanLangClassification", + "SwissJudgementClassification", + "AmazonReviewsClassification", + "NaijaSenti", + "TurkicClassification", + "ScalaClassification", + "MultilingualSentimentClassification", + "SIB200Classification", + "NordicLangClassification", + "NusaParagraphTopicClassification", + "CyrillicTurkicLangClassification", + "IndicNLPNewsClassification", + "MassiveScenarioClassification", + "MTOPIntentClassification", + "NusaParagraphEmotionClassification", + "MultiHateClassification", + "AfriSentiClassification", + "IndicSentimentClassification", + "LanguageClassification", + "AfriSentiLangClassification", + "NusaXSentiClassification", + "MTOPDomainClassification", + "HinDialectClassification", + "CataloniaTweetClassification", + "TweetSentimentClassification", + "MassiveIntentClassification", + "AmazonCounterfactualClassification", + "MasakhaNEWSClassification", + "GujaratiNewsClassification", + "IsiZuluNewsClassification", + "KinopoiskClassification", + "HeadlineClassification", + "InappropriatenessClassification", + "RuSciBenchGRNTIClassification", + "RuSciBenchOECDClassification", + "RuReviewsClassification", + "GeoreviewClassification", + "OdiaNewsClassification", + "GreekLegalCodeClassification", + "MovieReviewSentimentClassification", + "FrenchBookReviews", + "FrenkHrClassification", + "HindiDiscourseClassification", + "SentimentAnalysisHindi", + "MalayalamNewsClassification", + "InstructIR", + "CmedqaRetrieval", + "CovidRetrieval", + "DuRetrieval", + "EcomRetrieval", + "MMarcoRetrieval", + "MedicalRetrieval", + "T2Retrieval", + "VideoRetrieval", + "LeCaRDv2", + "SpanishPassageRetrievalS2S", + "SpanishPassageRetrievalS2P", + "MSMARCOPL", + "MSMARCOPLHardNegatives", + "SCIDOCSPL", + "SciFactPL", + "ArguAnaPL", + "FiQAPLRetrieval", + "NFCorpusPL", + "QuoraPLRetrieval", + "QuoraPLRetrievalHardNegatives", + "TRECCOVIDPL", + "NQPL", + "NQPLHardNegatives", + "DBPediaPL", + "DBPediaPLHardNegatives", + "HotpotQAPL", + "HotpotQAPLHardNegatives", + "GeorgianFAQRetrieval", + "SwednRetrieval", + "SweFaqRetrieval", + "SlovakSumRetrieval", + "SKQuadRetrieval", + "SNLRetrieval", + "NorQuadRetrieval", + "GermanQuADRetrieval", + "GerDaLIRSmall", + "GermanDPR", + "GermanGovServiceRetrieval", + "LegalQuAD", + "GerDaLIR", + "SadeemQuestionRetrieval", + "TurHistQuadRetrieval", + "VieQuADRetrieval", + "DanFever", + "DanFeverRetrieval", + "TV2Nordretrieval", + "TwitterHjerneRetrieval", + "EstQA", + "Quail", + "Touche2020", + "Touche2020v3Retrieval", + "TempReasonL2Pure", + "LegalSummarization", + "NQ", + "NQHardNegatives", + "SIQA", + "MSMARCO", + "MSMARCOHardNegatives", + "DBPedia", + "DBPediaHardNegatives", + "NarrativeQARetrieval", + "MSMARCOv2", + "CQADupstackTexRetrieval", + "TRECCOVID", + "WinoGrande", + "QuoraRetrieval", + "QuoraRetrievalHardNegatives", + "AlphaNLI", + "LEMBNeedleRetrieval", + "LEMBPasskeyRetrieval", + "CQADupstackAndroidRetrieval", + "TempReasonL2Context", + "ARCChallenge", + "LegalBenchCorporateLobbying", + "SCIDOCS", + "MedicalQARetrieval", + "RARbCode", + "LEMBQMSumRetrieval", + "TempReasonL3Context", + "AILAStatutes", + "TopiOCQARetrieval", + "TopiOCQARetrievalHardNegatives", + "ClimateFEVER", + "ClimateFEVERHardNegatives", + "CQADupstackWordpressRetrieval", + "CQADupstackEnglishRetrieval", + "CQADupstackStatsRetrieval", + "MLQuestionsRetrieval", + "TempReasonL2Fact", + "CQADupstackGamingRetrieval", + "CQADupstackWebmastersRetrieval", + "CQADupstackUnixRetrieval", + "TempReasonL3Pure", + "CQADupstackPhysicsRetrieval", + "FiQA2018", + "LitSearchRetrieval", + "FeedbackQARetrieval", + "HagridRetrieval", + "FaithDialRetrieval", + "SciFact", + "CQADupstackMathematicaRetrieval", + "RARbMath", + "HellaSwag", + "PIQA", + "SpartQA", + "BrightRetrieval", + "TempReasonL1", + "HotpotQA", + "HotpotQAHardNegatives", + "LegalBenchConsumerContractsQA", + "ArguAna", + "LEMBWikimQARetrieval", + "TempReasonL3Fact", + "FEVER", + "FEVERHardNegatives", + "CQADupstackGisRetrieval", + "AILACasedocs", + "NFCorpus", + "LEMBSummScreenFDRetrieval", + "LEMBNarrativeQARetrieval", + "CQADupstackProgrammersRetrieval", + "JaGovFaqsRetrieval", + "NLPJournalAbsIntroRetrieval", + "JaqketRetrieval", + "NLPJournalTitleAbsRetrieval", + "JaQuADRetrieval", + "NLPJournalTitleIntroRetrieval", + "HunSum2AbstractiveRetrieval", + "AutoRAGRetrieval", + "KoStrategyQA", + "WikipediaRetrievalMultilingual", + "MintakaRetrieval", + "PublicHealthQARetrieval", + "CrossLingualSemanticDiscriminationWMT19", + "MultiLongDocRetrieval", + "MIRACLRetrieval", + "MIRACLRetrievalHardNegatives", + "NeuCLIR2022Retrieval", + "NeuCLIR2022RetrievalHardNegatives", + "StatcanDialogueDatasetRetrieval", + "IndicQARetrieval", + "NeuCLIR2023Retrieval", + "NeuCLIR2023RetrievalHardNegatives", + "CrossLingualSemanticDiscriminationWMT21", + "XMarket", + "XPQARetrieval", + "BelebeleRetrieval", + "CUREv1Retrieval", + "MLQARetrieval", + "XQuADRetrieval", + "MrTidyRetrieval", + "CodeTransOceanContestRetrieval", + "CodeTransOceanDLRetrieval", + "CodeFeedbackMT", + "CodeSearchNetCCRetrieval", + "StackOverflowQARetrieval", + "CodeFeedbackST", + "CosQARetrieval", + "CodeEditSearchRetrieval", + "SyntheticText2SQLRetrieval", + "AppsRetrieval", + "CodeSearchNetRetrieval", + "COIRCodeSearchNetRetrieval", + "RiaNewsRetrieval", + "RiaNewsRetrievalHardNegatives", + "RuBQRetrieval", + "GreekCivicsQA", + "AlloprofRetrieval", + "BSARDRetrieval", + "SyntecRetrieval", + "FQuADRetrieval", + "AFQMC", + "ATEC", + "BQ", + "LCQMC", + "PAWSX", + "QBQTC", + "STSB", + "Assin2STS", + "SickBrSTS", + "STSES", + "CdscrSTS", + "SickrPLSTS", + "FinParaSTS", + "GermanSTSBenchmarkSTS", + "STS12STS", + "STS13STS", + "BiossesSTS", + "STS15STS", + "STSBenchmarkSTS", + "SickrSTS", + "STS16STS", + "STS14STS", + "FaroeseSTS", + "JSICK", + "JSTS", + "RonSTS", + "KorSTS", + "KlueSTS", + "IndicCrosslingualSTS", + "SemRel24STS", + "STS17Crosslingual", + "STS22CrosslingualSTS", + "STS22CrosslingualSTSv2", + "STSBenchmarkMultilingualSTS", + "RUParaPhraserSTS", + "RuSTSBenchmarkSTS", + "SickFrSTS", + "CMedQAv1", + "CMedQAv2", + "MMarcoReranking", + "T2Reranking", + "AskUbuntuDupQuestions", + "WebLINXCandidatesReranking", + "StackOverflowDupQuestions", + "NevIR", + "MindSmallReranking", + "SciDocsReranking", + "VoyageMMarcoReranking", + "MIRACLReranking", + "ESCIReranking", + "WikipediaRerankingMultilingual", + "RuBQReranking", + "SyntecReranking", + "AlloprofReranking", +] diff --git a/scripts/generate_imports.py b/scripts/generate_imports.py new file mode 100644 index 0000000000..469d894441 --- /dev/null +++ b/scripts/generate_imports.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +import ast +import importlib +import inspect +import os +import types +from pathlib import Path + +# Adjust this import to the correct location of AbsTask. +from mteb.tasks import AbsTask + +BASE_DIR = Path("../mteb/tasks") + + +def find_task_classes_in_module(full_module_name): + """Import a module and return a list of classes inheriting from AbsTask.""" + try: + mod = importlib.import_module(full_module_name) + except ImportError: + return [] + + task_classes = [] + for name, obj in inspect.getmembers(mod, inspect.isclass): + if ( + isinstance(obj, type) + and not isinstance(obj, types.GenericAlias) + and issubclass(obj, AbsTask) + and obj is not AbsTask + and not obj.__name__.startswith("AbsTask") + and not obj.__name__ == "MultilingualTask" + ): + task_classes.append(name) + return task_classes + + +def parse_all_from_init(init_path): + """Parse __all__ from an existing __init__.py file to aggregate imports.""" + if not init_path.is_file(): + return [] + with open(init_path) as f: + tree = ast.parse(f.read()) + all_assignments = [ + n + for n in tree.body + if isinstance(n, ast.Assign) + and len(n.targets) == 1 + and n.targets[0].id == "__all__" + ] + if not all_assignments: + return [] + # Expecting __all__ to be a list of strings + val = all_assignments[0].value + if isinstance(val, ast.List): + return [elt.s for elt in val.elts if isinstance(elt, ast.Str)] + return [] + + +for root, dirs, files in os.walk(BASE_DIR, topdown=False): + # Process this directory + py_files = [f for f in files if f.endswith(".py") and f != "__init__.py"] + relative_path = Path(root).relative_to(BASE_DIR.parent) + package_path = ".".join(relative_path.parts) + + # Find classes in Python files of the current directory + import_lines = [] + all_classes = [] + for py_file in py_files: + module_name = py_file[:-3] # remove .py + full_module_name = f"mteb.{package_path}.{module_name}" + task_classes = find_task_classes_in_module(full_module_name) + if task_classes: + import_line = f"from .{module_name} import {', '.join(task_classes)}" + import_lines.append(import_line) + all_classes.extend(task_classes) + + # Also aggregate subdirectories that have their own __init__.py and __all__ + sub_import_lines = [] + for d in dirs: + sub_init = Path(root) / d / "__init__.py" + if sub_init.exists(): + sub_all = parse_all_from_init(sub_init) + if sub_all: + # Import all from the subpackage + sub_import_line = f"from .{d} import {', '.join(sub_all)}" + import_lines.append(sub_import_line) + all_classes.extend(sub_all) + # Deduplicate classes + all_classes = list( + dict.fromkeys(all_classes) + ) # preserves order while removing duplicates + + init_path = Path(root) / "__init__.py" + with open(init_path, "w") as init_file: + # Write imports from current directory modules + for line in import_lines: + init_file.write(line + "\n") + + # Write imports from subdirectories + for line in sub_import_lines: + init_file.write(line + "\n") + + # Write __all__ + init_file.write(f"__all__ = {all_classes!r}\n") + + print(f"Updated {init_path} with imports and __all__ = {all_classes}") From 23fb64240379ce526d7a1440890b83b8a2149208 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Sun, 8 Dec 2024 20:50:39 +0300 Subject: [PATCH 72/76] fix tests --- mteb/__init__.py | 36 ++- .../Classification/Ddisco.json | 44 +++ .../GeorgianSentimentClassification.json | 38 +++ .../WongnaiReviewsClassification.json | 56 ++++ mteb/evaluation/MTEB.py | 3 +- mteb/evaluation/evaluators/model_classes.py | 2 +- mteb/models/__init__.py | 5 +- mteb/overview.py | 2 +- .../tha/WongnaiReviewsClassification.py | 6 +- tests/test_benchmark/mock_models.py | 1 + tests/test_benchmark/mock_tasks.py | 252 +----------------- tests/test_load_results/test_mteb_results.py | 4 +- 12 files changed, 193 insertions(+), 256 deletions(-) create mode 100644 mteb/descriptive_stats/Classification/Ddisco.json create mode 100644 mteb/descriptive_stats/Classification/GeorgianSentimentClassification.json create mode 100644 mteb/descriptive_stats/Classification/WongnaiReviewsClassification.json diff --git a/mteb/__init__.py b/mteb/__init__.py index 0953699ff9..e40467a28b 100644 --- a/mteb/__init__.py +++ b/mteb/__init__.py @@ -12,15 +12,31 @@ ) from mteb.evaluation import MTEB from mteb.load_results import BenchmarkResults, load_results -from mteb.models import get_model, get_model_meta, get_model_metas +from mteb.models import get_model, get_model_meta, get_model_metas, SentenceTransformerWrapper +from mteb.encoder_interface import Encoder from mteb.overview import TASKS_REGISTRY, get_task, get_tasks +from mteb.load_results.task_results import TaskResult +from mteb.abstasks import ( + AbsTask, + AbsTaskBitextMining, + AbsTaskClassification, + AbsTaskClustering, + AbsTaskClusteringFast, + AbsTaskMultilabelClassification, + AbsTaskPairClassification, + AbsTaskReranking, + AbsTaskRetrieval, + AbsTaskSpeedTask, + AbsTaskSTS, + AbsTaskSummarization, + MultilingualTask, +) from .benchmarks.benchmarks import Benchmark from .benchmarks.get_benchmark import BENCHMARK_REGISTRY, get_benchmark, get_benchmarks __version__ = version("mteb") # fetch version from install metadata - __all__ = [ "MTEB_ENG_CLASSIC", "MTEB_MAIN_RU", @@ -41,4 +57,20 @@ "BenchmarkResults", "BENCHMARK_REGISTRY", "MTEB", + "TaskResult", + "AbsTask", + "AbsTaskBitextMining", + "AbsTaskClassification", + "AbsTaskClustering", + "AbsTaskClusteringFast", + "AbsTaskMultilabelClassification", + "AbsTaskPairClassification", + "AbsTaskReranking", + "AbsTaskRetrieval", + "AbsTaskSpeedTask", + "AbsTaskSTS", + "AbsTaskSummarization", + "MultilingualTask", + "SentenceTransformerWrapper", + "Encoder", ] diff --git a/mteb/descriptive_stats/Classification/Ddisco.json b/mteb/descriptive_stats/Classification/Ddisco.json new file mode 100644 index 0000000000..c9b0bfd67e --- /dev/null +++ b/mteb/descriptive_stats/Classification/Ddisco.json @@ -0,0 +1,44 @@ +{ + "test": { + "num_samples": 201, + "number_of_characters": 200062, + "number_texts_intersect_with_train": 1, + "min_text_length": 529, + "average_text_length": 995.3333333333334, + "max_text_length": 2050, + "unique_text": 201, + "unique_labels": 3, + "labels": { + "2": { + "count": 76 + }, + "3": { + "count": 115 + }, + "1": { + "count": 10 + } + } + }, + "train": { + "num_samples": 801, + "number_of_characters": 779241, + "number_texts_intersect_with_train": null, + "min_text_length": 492, + "average_text_length": 972.8352059925094, + "max_text_length": 2411, + "unique_text": 796, + "unique_labels": 3, + "labels": { + "1": { + "count": 30 + }, + "2": { + "count": 325 + }, + "3": { + "count": 446 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/GeorgianSentimentClassification.json b/mteb/descriptive_stats/Classification/GeorgianSentimentClassification.json new file mode 100644 index 0000000000..f6e00d147e --- /dev/null +++ b/mteb/descriptive_stats/Classification/GeorgianSentimentClassification.json @@ -0,0 +1,38 @@ +{ + "test": { + "num_samples": 1200, + "number_of_characters": 141679, + "number_texts_intersect_with_train": 0, + "min_text_length": 25, + "average_text_length": 118.06583333333333, + "max_text_length": 566, + "unique_text": 1200, + "unique_labels": 2, + "labels": { + "1": { + "count": 600 + }, + "0": { + "count": 600 + } + } + }, + "train": { + "num_samples": 330, + "number_of_characters": 37706, + "number_texts_intersect_with_train": null, + "min_text_length": 19, + "average_text_length": 114.26060606060607, + "max_text_length": 315, + "unique_text": 330, + "unique_labels": 2, + "labels": { + "1": { + "count": 165 + }, + "0": { + "count": 165 + } + } + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Classification/WongnaiReviewsClassification.json b/mteb/descriptive_stats/Classification/WongnaiReviewsClassification.json new file mode 100644 index 0000000000..9896719ce5 --- /dev/null +++ b/mteb/descriptive_stats/Classification/WongnaiReviewsClassification.json @@ -0,0 +1,56 @@ +{ + "test": { + "num_samples": 2048, + "number_of_characters": 1198297, + "number_texts_intersect_with_train": 0, + "min_text_length": 200, + "average_text_length": 585.10595703125, + "max_text_length": 14899, + "unique_text": 2048, + "unique_labels": 5, + "labels": { + "3": { + "count": 983 + }, + "1": { + "count": 68 + }, + "4": { + "count": 351 + }, + "2": { + "count": 629 + }, + "0": { + "count": 17 + } + } + }, + "train": { + "num_samples": 40000, + "number_of_characters": 21614868, + "number_texts_intersect_with_train": null, + "min_text_length": 200, + "average_text_length": 540.3717, + "max_text_length": 20557, + "unique_text": 39993, + "unique_labels": 5, + "labels": { + "2": { + "count": 12171 + }, + "3": { + "count": 18770 + }, + "4": { + "count": 6799 + }, + "1": { + "count": 1845 + }, + "0": { + "count": 415 + } + } + } +} \ No newline at end of file diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py index dc8853dd8e..6ca449e1c5 100644 --- a/mteb/evaluation/MTEB.py +++ b/mteb/evaluation/MTEB.py @@ -21,7 +21,8 @@ from mteb.model_meta import ModelMeta from mteb.models import model_meta_from_sentence_transformers -from ..abstasks import AbsTask, AbsTaskReranking +from ..abstasks.AbsTask import AbsTask +from ..abstasks.AbsTaskReranking import AbsTaskReranking from ..load_results.task_results import TaskResult from ..models.sentence_transformer_wrapper import SentenceTransformerWrapper from . import LangMapping diff --git a/mteb/evaluation/evaluators/model_classes.py b/mteb/evaluation/evaluators/model_classes.py index 60dea56385..146d529dc9 100644 --- a/mteb/evaluation/evaluators/model_classes.py +++ b/mteb/evaluation/evaluators/model_classes.py @@ -558,5 +558,5 @@ def encode( def is_cross_encoder_compatible(model) -> bool: - op = getattr(model.model, "predict", None) + op = getattr(model, "predict", None) return callable(op) diff --git a/mteb/models/__init__.py b/mteb/models/__init__.py index 3804aebcd8..78ba729cac 100644 --- a/mteb/models/__init__.py +++ b/mteb/models/__init__.py @@ -1,6 +1,5 @@ from __future__ import annotations -import logging from mteb.models.overview import ( MODEL_REGISTRY, @@ -10,8 +9,7 @@ get_model_metas, model_meta_from_sentence_transformers, ) - -logger = logging.getLogger(__name__) +from .sentence_transformer_wrapper import SentenceTransformerWrapper __all__ = [ @@ -21,4 +19,5 @@ "get_model_meta", "get_model_metas", "model_meta_from_sentence_transformers", + "SentenceTransformerWrapper", ] diff --git a/mteb/overview.py b/mteb/overview.py index c664bdd178..717b196175 100644 --- a/mteb/overview.py +++ b/mteb/overview.py @@ -321,7 +321,7 @@ def get_task( close_matches = difflib.get_close_matches(task_name, TASKS_REGISTRY.keys()) if close_matches: suggestion = ( - f"KeyError: '{task_name}' not found. Did you mean: {close_matches[0]}?" + f"KeyError: '{task_name}' not found. Did you mean: '{close_matches[0]}'?" ) else: suggestion = ( diff --git a/mteb/tasks/Classification/tha/WongnaiReviewsClassification.py b/mteb/tasks/Classification/tha/WongnaiReviewsClassification.py index 4afd64dd21..ab00a052a3 100644 --- a/mteb/tasks/Classification/tha/WongnaiReviewsClassification.py +++ b/mteb/tasks/Classification/tha/WongnaiReviewsClassification.py @@ -6,12 +6,12 @@ class WongnaiReviewsClassification(AbsTaskClassification): metadata = TaskMetadata( - name="WongnaiReviewsClassification ", + name="WongnaiReviewsClassification", description="Wongnai features over 200,000 restaurants, beauty salons, and spas across Thailand on its platform, with detailed information about each merchant and user reviews. In this dataset there are 5 classes corressponding each star rating", reference="https://github.com/wongnai/wongnai-corpus", dataset={ - "path": "wongnai_reviews", - "revision": "e708d4545d7ab10dd2c6b5b5b2a72ca28685dae2", + "path": "Wongnai/wongnai_reviews", + "revision": "cd351eb26093aa4b232a2390a0da35e7fab21655", }, type="Classification", category="p2p", diff --git a/tests/test_benchmark/mock_models.py b/tests/test_benchmark/mock_models.py index 6b26cf67d4..7024d00113 100644 --- a/tests/test_benchmark/mock_models.py +++ b/tests/test_benchmark/mock_models.py @@ -61,6 +61,7 @@ def encode( convert_to_tensor: bool = False, device: str | None = None, normalize_embeddings: bool = False, + **kwargs: Any, ) -> list[Tensor] | ndarray | Tensor: return torch.randn(len(sentences), 10).numpy() diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index 70704ebed8..6e863cd946 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -47,30 +47,8 @@ class MockClassificationTask(AbsTaskClassification): - expected_stats = { - "test": { - "num_samples": 2, - "number_of_characters": 52, - "number_texts_intersect_with_train": 2, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 1}, "1": {"count": 1}}, - }, - "train": { - "num_samples": 2, - "number_of_characters": 52, - "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 1}, "1": {"count": 1}}, - }, - } + expected_stats = {'test': {'num_samples': 2, 'number_of_characters': 52, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 1}, '1': {'count': 1}}}, 'train': {'num_samples': 2, 'number_of_characters': 53, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 1}, '1': {'count': 1}}}} + metadata = TaskMetadata( type="Classification", @@ -105,74 +83,8 @@ def load_data(self, **kwargs): class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask): - expected_stats = { - "test": { - "num_samples": 4, - "number_of_characters": 104, - "number_texts_intersect_with_train": 2, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 2}, "1": {"count": 2}}, - "hf_subset_descriptive_stats": { - "eng": { - "num_samples": 2, - "number_of_characters": 52, - "number_texts_intersect_with_train": 2, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 1}, "1": {"count": 1}}, - }, - "fra": { - "num_samples": 2, - "number_of_characters": 52, - "number_texts_intersect_with_train": 2, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 1}, "1": {"count": 1}}, - }, - }, - }, - "train": { - "num_samples": 4, - "number_of_characters": 104, - "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.0, - "unique_labels": 2, - "labels": {"0": {"count": 2}, "1": {"count": 2}}, - "hf_subset_descriptive_stats": { - "eng": { - "num_samples": 2, - "number_of_characters": 52, - "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.0, - "unique_labels": 2, - "labels": {"0": {"count": 1}, "1": {"count": 1}}, - }, - "fra": { - "num_samples": 2, - "number_of_characters": 52, - "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 1}, "1": {"count": 1}}, - }, - }, - }, - } + expected_stats = {'test': {'num_samples': 4, 'number_of_characters': 104, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 2}, '1': {'count': 2}}, 'hf_subset_descriptive_stats': {'eng': {'num_samples': 2, 'number_of_characters': 52, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 1}, '1': {'count': 1}}}, 'fra': {'num_samples': 2, 'number_of_characters': 52, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 1}, '1': {'count': 1}}}}}, 'train': {'num_samples': 4, 'number_of_characters': 106, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 2}, '1': {'count': 2}}, 'hf_subset_descriptive_stats': {'eng': {'num_samples': 2, 'number_of_characters': 53, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 1}, '1': {'count': 1}}}, 'fra': {'num_samples': 2, 'number_of_characters': 53, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 1}, '1': {'count': 1}}}}}} + metadata = TaskMetadata( type="Classification", @@ -1335,37 +1247,8 @@ def load_data(self, **kwargs): class MockRetrievalTask(AbsTaskRetrieval): - expected_stats = { - "test": { - "num_samples": 4, - "number_of_characters": 154, - "num_documents": 2, - "min_document_length": 51, - "average_document_length": 51.0, - "max_document_length": 51, - "unique_documents": 2, - "num_queries": 2, - "min_query_length": 23, - "average_query_length": 26.0, - "max_query_length": 29, - "unique_queries": 2, - "none_queries": 0, - "num_relevant_docs": 4, - "min_relevant_docs_per_query": 2, - "average_relevant_docs_per_query": 1.0, - "max_relevant_docs_per_query": 2, - "unique_relevant_docs": 2, - "num_instructions": None, - "min_instruction_length": None, - "average_instruction_length": None, - "max_instruction_length": None, - "unique_instructions": None, - "num_top_ranked": None, - "min_top_ranked_per_query": None, - "average_top_ranked_per_query": None, - "max_top_ranked_per_query": None, - } - } + expected_stats = {'val': {'num_samples': 4, 'number_of_characters': 112, 'num_documents': 2, 'min_document_length': 27, 'average_document_length': 30.0, 'max_document_length': 33, 'unique_documents': 2, 'num_queries': 2, 'min_query_length': 23, 'average_query_length': 26.0, 'max_query_length': 29, 'unique_queries': 2, 'none_queries': 0, 'num_relevant_docs': 4, 'min_relevant_docs_per_query': 2, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 2, 'num_instructions': None, 'min_instruction_length': None, 'average_instruction_length': None, 'max_instruction_length': None, 'unique_instructions': None, 'num_top_ranked': None, 'min_top_ranked_per_query': None, 'average_top_ranked_per_query': None, 'max_top_ranked_per_query': None}, 'test': {'num_samples': 4, 'number_of_characters': 112, 'num_documents': 2, 'min_document_length': 27, 'average_document_length': 30.0, 'max_document_length': 33, 'unique_documents': 2, 'num_queries': 2, 'min_query_length': 23, 'average_query_length': 26.0, 'max_query_length': 29, 'unique_queries': 2, 'none_queries': 0, 'num_relevant_docs': 4, 'min_relevant_docs_per_query': 2, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 2, 'num_instructions': None, 'min_instruction_length': None, 'average_instruction_length': None, 'max_instruction_length': None, 'unique_instructions': None, 'num_top_ranked': None, 'min_top_ranked_per_query': None, 'average_top_ranked_per_query': None, 'max_top_ranked_per_query': None}} + metadata = TaskMetadata( type="Retrieval", @@ -1545,36 +1428,8 @@ def load_data(self, **kwargs): class MockMultilabelClassification(AbsTaskMultilabelClassification): - expected_stats = { - "test": { - "num_samples": 6, - "number_of_characters": 156, - "number_texts_intersect_with_train": 2, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, - }, - "train": { - "num_samples": 6, - "number_of_characters": 156, - "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, - }, - } + expected_stats = {'test': {'num_samples': 6, 'number_of_characters': 156, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 6}, '1': {'count': 6}}}, 'train': {'num_samples': 6, 'number_of_characters': 159, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 6}, '1': {'count': 6}}}} + metadata = TaskMetadata( type="MultilabelClassification", @@ -1610,96 +1465,7 @@ def load_data(self, **kwargs): class MockMultilingualMultilabelClassification( AbsTaskMultilabelClassification, MultilingualTask ): - expected_stats = { - "test": { - "num_samples": 12, - "number_of_characters": 312, - "number_texts_intersect_with_train": 2, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 12}, "1": {"count": 12}}, - "hf_subset_descriptive_stats": { - "eng": { - "num_samples": 6, - "number_of_characters": 156, - "number_texts_intersect_with_train": 2, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, - }, - "fra": { - "num_samples": 6, - "number_of_characters": 156, - "number_texts_intersect_with_train": 2, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, - }, - }, - }, - "train": { - "num_samples": 12, - "number_of_characters": 312, - "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 12}, "1": {"count": 12}}, - "hf_subset_descriptive_stats": { - "eng": { - "num_samples": 6, - "number_of_characters": 156, - "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, - }, - "fra": { - "num_samples": 6, - "number_of_characters": 156, - "number_texts_intersect_with_train": None, - "min_text_length": 23, - "average_text_length": 26.0, - "max_text_length": 29, - "unique_texts": 2, - "min_labels_per_text": 2, - "average_label_per_text": 2.0, - "max_labels_per_text": 2, - "unique_labels": 2, - "labels": {"0": {"count": 6}, "1": {"count": 6}}, - }, - }, - }, - } + expected_stats = {'test': {'num_samples': 12, 'number_of_characters': 312, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 12}, '1': {'count': 12}}, 'hf_subset_descriptive_stats': {'eng': {'num_samples': 6, 'number_of_characters': 156, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 6}, '1': {'count': 6}}}, 'fra': {'num_samples': 6, 'number_of_characters': 156, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 6}, '1': {'count': 6}}}}}, 'train': {'num_samples': 12, 'number_of_characters': 318, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 12}, '1': {'count': 12}}, 'hf_subset_descriptive_stats': {'eng': {'num_samples': 6, 'number_of_characters': 159, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 6}, '1': {'count': 6}}}, 'fra': {'num_samples': 6, 'number_of_characters': 159, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 6}, '1': {'count': 6}}}}}} metadata = TaskMetadata( type="MultilabelClassification", diff --git a/tests/test_load_results/test_mteb_results.py b/tests/test_load_results/test_mteb_results.py index 84071b735f..60371f5c23 100644 --- a/tests/test_load_results/test_mteb_results.py +++ b/tests/test_load_results/test_mteb_results.py @@ -5,8 +5,8 @@ import pytest -import mteb from mteb import AbsTask +from mteb.abstasks.TaskMetadata import TaskMetadata from mteb.load_results.task_results import TaskResult tests_folder = Path(__file__).parent.parent @@ -14,7 +14,7 @@ class DummyTask(AbsTask): superseded_by = "newer_task" - metadata = mteb.TaskMetadata( + metadata = TaskMetadata( name="dummy_task", description="dummy task for testing", dataset={"revision": "1.0", "path": "dummy_dataset"}, From 54a7f5c779c00cfd551e6d4a30e532db6c803f9b Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Sun, 8 Dec 2024 20:51:12 +0300 Subject: [PATCH 73/76] lint --- mteb/__init__.py | 33 ++-- mteb/models/__init__.py | 3 +- mteb/overview.py | 4 +- tests/test_benchmark/mock_tasks.py | 285 ++++++++++++++++++++++++++++- 4 files changed, 297 insertions(+), 28 deletions(-) diff --git a/mteb/__init__.py b/mteb/__init__.py index e40467a28b..27458a0e3a 100644 --- a/mteb/__init__.py +++ b/mteb/__init__.py @@ -2,20 +2,6 @@ from importlib.metadata import version -from mteb.benchmarks.benchmarks import ( - MTEB_ENG_CLASSIC, - MTEB_MAIN_RU, - MTEB_RETRIEVAL_LAW, - MTEB_RETRIEVAL_MEDICAL, - MTEB_RETRIEVAL_WITH_INSTRUCTIONS, - CoIR, -) -from mteb.evaluation import MTEB -from mteb.load_results import BenchmarkResults, load_results -from mteb.models import get_model, get_model_meta, get_model_metas, SentenceTransformerWrapper -from mteb.encoder_interface import Encoder -from mteb.overview import TASKS_REGISTRY, get_task, get_tasks -from mteb.load_results.task_results import TaskResult from mteb.abstasks import ( AbsTask, AbsTaskBitextMining, @@ -31,6 +17,25 @@ AbsTaskSummarization, MultilingualTask, ) +from mteb.benchmarks.benchmarks import ( + MTEB_ENG_CLASSIC, + MTEB_MAIN_RU, + MTEB_RETRIEVAL_LAW, + MTEB_RETRIEVAL_MEDICAL, + MTEB_RETRIEVAL_WITH_INSTRUCTIONS, + CoIR, +) +from mteb.encoder_interface import Encoder +from mteb.evaluation import MTEB +from mteb.load_results import BenchmarkResults, load_results +from mteb.load_results.task_results import TaskResult +from mteb.models import ( + SentenceTransformerWrapper, + get_model, + get_model_meta, + get_model_metas, +) +from mteb.overview import TASKS_REGISTRY, get_task, get_tasks from .benchmarks.benchmarks import Benchmark from .benchmarks.get_benchmark import BENCHMARK_REGISTRY, get_benchmark, get_benchmarks diff --git a/mteb/models/__init__.py b/mteb/models/__init__.py index 78ba729cac..ce63e85798 100644 --- a/mteb/models/__init__.py +++ b/mteb/models/__init__.py @@ -1,6 +1,5 @@ from __future__ import annotations - from mteb.models.overview import ( MODEL_REGISTRY, ModelMeta, @@ -9,8 +8,8 @@ get_model_metas, model_meta_from_sentence_transformers, ) -from .sentence_transformer_wrapper import SentenceTransformerWrapper +from .sentence_transformer_wrapper import SentenceTransformerWrapper __all__ = [ "MODEL_REGISTRY", diff --git a/mteb/overview.py b/mteb/overview.py index 717b196175..b3a61e73ec 100644 --- a/mteb/overview.py +++ b/mteb/overview.py @@ -320,9 +320,7 @@ def get_task( if task_name not in TASKS_REGISTRY: close_matches = difflib.get_close_matches(task_name, TASKS_REGISTRY.keys()) if close_matches: - suggestion = ( - f"KeyError: '{task_name}' not found. Did you mean: '{close_matches[0]}'?" - ) + suggestion = f"KeyError: '{task_name}' not found. Did you mean: '{close_matches[0]}'?" else: suggestion = ( f"KeyError: '{task_name}' not found and no similar keys were found." diff --git a/tests/test_benchmark/mock_tasks.py b/tests/test_benchmark/mock_tasks.py index 6e863cd946..7a096828e5 100644 --- a/tests/test_benchmark/mock_tasks.py +++ b/tests/test_benchmark/mock_tasks.py @@ -47,8 +47,30 @@ class MockClassificationTask(AbsTaskClassification): - expected_stats = {'test': {'num_samples': 2, 'number_of_characters': 52, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 1}, '1': {'count': 1}}}, 'train': {'num_samples': 2, 'number_of_characters': 53, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 1}, '1': {'count': 1}}}} - + expected_stats = { + "test": { + "num_samples": 2, + "number_of_characters": 52, + "number_texts_intersect_with_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, + "train": { + "num_samples": 2, + "number_of_characters": 53, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, + } metadata = TaskMetadata( type="Classification", @@ -83,8 +105,78 @@ def load_data(self, **kwargs): class MockMultilingualClassificationTask(AbsTaskClassification, MultilingualTask): - expected_stats = {'test': {'num_samples': 4, 'number_of_characters': 104, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 2}, '1': {'count': 2}}, 'hf_subset_descriptive_stats': {'eng': {'num_samples': 2, 'number_of_characters': 52, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 1}, '1': {'count': 1}}}, 'fra': {'num_samples': 2, 'number_of_characters': 52, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 1}, '1': {'count': 1}}}}}, 'train': {'num_samples': 4, 'number_of_characters': 106, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 2}, '1': {'count': 2}}, 'hf_subset_descriptive_stats': {'eng': {'num_samples': 2, 'number_of_characters': 53, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 1}, '1': {'count': 1}}}, 'fra': {'num_samples': 2, 'number_of_characters': 53, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 1}, '1': {'count': 1}}}}}} - + expected_stats = { + "test": { + "num_samples": 4, + "number_of_characters": 104, + "number_texts_intersect_with_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 2}, "1": {"count": 2}}, + "hf_subset_descriptive_stats": { + "eng": { + "num_samples": 2, + "number_of_characters": 52, + "number_texts_intersect_with_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, + "fra": { + "num_samples": 2, + "number_of_characters": 52, + "number_texts_intersect_with_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, + }, + }, + "train": { + "num_samples": 4, + "number_of_characters": 106, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 2}, "1": {"count": 2}}, + "hf_subset_descriptive_stats": { + "eng": { + "num_samples": 2, + "number_of_characters": 53, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, + "fra": { + "num_samples": 2, + "number_of_characters": 53, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 1}, "1": {"count": 1}}, + }, + }, + }, + } metadata = TaskMetadata( type="Classification", @@ -1247,8 +1339,66 @@ def load_data(self, **kwargs): class MockRetrievalTask(AbsTaskRetrieval): - expected_stats = {'val': {'num_samples': 4, 'number_of_characters': 112, 'num_documents': 2, 'min_document_length': 27, 'average_document_length': 30.0, 'max_document_length': 33, 'unique_documents': 2, 'num_queries': 2, 'min_query_length': 23, 'average_query_length': 26.0, 'max_query_length': 29, 'unique_queries': 2, 'none_queries': 0, 'num_relevant_docs': 4, 'min_relevant_docs_per_query': 2, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 2, 'num_instructions': None, 'min_instruction_length': None, 'average_instruction_length': None, 'max_instruction_length': None, 'unique_instructions': None, 'num_top_ranked': None, 'min_top_ranked_per_query': None, 'average_top_ranked_per_query': None, 'max_top_ranked_per_query': None}, 'test': {'num_samples': 4, 'number_of_characters': 112, 'num_documents': 2, 'min_document_length': 27, 'average_document_length': 30.0, 'max_document_length': 33, 'unique_documents': 2, 'num_queries': 2, 'min_query_length': 23, 'average_query_length': 26.0, 'max_query_length': 29, 'unique_queries': 2, 'none_queries': 0, 'num_relevant_docs': 4, 'min_relevant_docs_per_query': 2, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 2, 'num_instructions': None, 'min_instruction_length': None, 'average_instruction_length': None, 'max_instruction_length': None, 'unique_instructions': None, 'num_top_ranked': None, 'min_top_ranked_per_query': None, 'average_top_ranked_per_query': None, 'max_top_ranked_per_query': None}} - + expected_stats = { + "val": { + "num_samples": 4, + "number_of_characters": 112, + "num_documents": 2, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, + "unique_documents": 2, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, + "unique_queries": 2, + "none_queries": 0, + "num_relevant_docs": 4, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": None, + "min_instruction_length": None, + "average_instruction_length": None, + "max_instruction_length": None, + "unique_instructions": None, + "num_top_ranked": None, + "min_top_ranked_per_query": None, + "average_top_ranked_per_query": None, + "max_top_ranked_per_query": None, + }, + "test": { + "num_samples": 4, + "number_of_characters": 112, + "num_documents": 2, + "min_document_length": 27, + "average_document_length": 30.0, + "max_document_length": 33, + "unique_documents": 2, + "num_queries": 2, + "min_query_length": 23, + "average_query_length": 26.0, + "max_query_length": 29, + "unique_queries": 2, + "none_queries": 0, + "num_relevant_docs": 4, + "min_relevant_docs_per_query": 2, + "average_relevant_docs_per_query": 1.0, + "max_relevant_docs_per_query": 2, + "unique_relevant_docs": 2, + "num_instructions": None, + "min_instruction_length": None, + "average_instruction_length": None, + "max_instruction_length": None, + "unique_instructions": None, + "num_top_ranked": None, + "min_top_ranked_per_query": None, + "average_top_ranked_per_query": None, + "max_top_ranked_per_query": None, + }, + } metadata = TaskMetadata( type="Retrieval", @@ -1428,8 +1578,36 @@ def load_data(self, **kwargs): class MockMultilabelClassification(AbsTaskMultilabelClassification): - expected_stats = {'test': {'num_samples': 6, 'number_of_characters': 156, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 6}, '1': {'count': 6}}}, 'train': {'num_samples': 6, 'number_of_characters': 159, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 6}, '1': {'count': 6}}}} - + expected_stats = { + "test": { + "num_samples": 6, + "number_of_characters": 156, + "number_texts_intersect_with_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + "train": { + "num_samples": 6, + "number_of_characters": 159, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + } metadata = TaskMetadata( type="MultilabelClassification", @@ -1465,7 +1643,96 @@ def load_data(self, **kwargs): class MockMultilingualMultilabelClassification( AbsTaskMultilabelClassification, MultilingualTask ): - expected_stats = {'test': {'num_samples': 12, 'number_of_characters': 312, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 12}, '1': {'count': 12}}, 'hf_subset_descriptive_stats': {'eng': {'num_samples': 6, 'number_of_characters': 156, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 6}, '1': {'count': 6}}}, 'fra': {'num_samples': 6, 'number_of_characters': 156, 'number_texts_intersect_with_train': 1, 'min_text_length': 23, 'average_text_length': 26.0, 'max_text_length': 29, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 6}, '1': {'count': 6}}}}}, 'train': {'num_samples': 12, 'number_of_characters': 318, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 12}, '1': {'count': 12}}, 'hf_subset_descriptive_stats': {'eng': {'num_samples': 6, 'number_of_characters': 159, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 6}, '1': {'count': 6}}}, 'fra': {'num_samples': 6, 'number_of_characters': 159, 'number_texts_intersect_with_train': None, 'min_text_length': 23, 'average_text_length': 26.5, 'max_text_length': 30, 'unique_texts': 2, 'min_labels_per_text': 2, 'average_label_per_text': 2.0, 'max_labels_per_text': 2, 'unique_labels': 2, 'labels': {'0': {'count': 6}, '1': {'count': 6}}}}}} + expected_stats = { + "test": { + "num_samples": 12, + "number_of_characters": 312, + "number_texts_intersect_with_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 12}, "1": {"count": 12}}, + "hf_subset_descriptive_stats": { + "eng": { + "num_samples": 6, + "number_of_characters": 156, + "number_texts_intersect_with_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + "fra": { + "num_samples": 6, + "number_of_characters": 156, + "number_texts_intersect_with_train": 1, + "min_text_length": 23, + "average_text_length": 26.0, + "max_text_length": 29, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + }, + }, + "train": { + "num_samples": 12, + "number_of_characters": 318, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 12}, "1": {"count": 12}}, + "hf_subset_descriptive_stats": { + "eng": { + "num_samples": 6, + "number_of_characters": 159, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + "fra": { + "num_samples": 6, + "number_of_characters": 159, + "number_texts_intersect_with_train": None, + "min_text_length": 23, + "average_text_length": 26.5, + "max_text_length": 30, + "unique_texts": 2, + "min_labels_per_text": 2, + "average_label_per_text": 2.0, + "max_labels_per_text": 2, + "unique_labels": 2, + "labels": {"0": {"count": 6}, "1": {"count": 6}}, + }, + }, + }, + } metadata = TaskMetadata( type="MultilabelClassification", From d67225bca1b1fa1a82c839b34d5415776016cd6a Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:29:00 +0300 Subject: [PATCH 74/76] update imports --- mteb/__init__.py | 28 -------------------- mteb/abstasks/__init__.py | 2 ++ tests/test_TaskMetadata.py | 3 +-- tests/test_load_results/test_mteb_results.py | 3 +-- 4 files changed, 4 insertions(+), 32 deletions(-) diff --git a/mteb/__init__.py b/mteb/__init__.py index 27458a0e3a..ff4e065efb 100644 --- a/mteb/__init__.py +++ b/mteb/__init__.py @@ -2,21 +2,6 @@ from importlib.metadata import version -from mteb.abstasks import ( - AbsTask, - AbsTaskBitextMining, - AbsTaskClassification, - AbsTaskClustering, - AbsTaskClusteringFast, - AbsTaskMultilabelClassification, - AbsTaskPairClassification, - AbsTaskReranking, - AbsTaskRetrieval, - AbsTaskSpeedTask, - AbsTaskSTS, - AbsTaskSummarization, - MultilingualTask, -) from mteb.benchmarks.benchmarks import ( MTEB_ENG_CLASSIC, MTEB_MAIN_RU, @@ -63,19 +48,6 @@ "BENCHMARK_REGISTRY", "MTEB", "TaskResult", - "AbsTask", - "AbsTaskBitextMining", - "AbsTaskClassification", - "AbsTaskClustering", - "AbsTaskClusteringFast", - "AbsTaskMultilabelClassification", - "AbsTaskPairClassification", - "AbsTaskReranking", - "AbsTaskRetrieval", - "AbsTaskSpeedTask", - "AbsTaskSTS", - "AbsTaskSummarization", - "MultilingualTask", "SentenceTransformerWrapper", "Encoder", ] diff --git a/mteb/abstasks/__init__.py b/mteb/abstasks/__init__.py index 83c2a6d1df..a95eeaf09d 100644 --- a/mteb/abstasks/__init__.py +++ b/mteb/abstasks/__init__.py @@ -13,6 +13,7 @@ from .AbsTaskSTS import AbsTaskSTS from .AbsTaskSummarization import AbsTaskSummarization from .MultilingualTask import MultilingualTask +from .TaskMetadata import TaskMetadata __all__ = [ "AbsTask", @@ -28,4 +29,5 @@ "AbsTaskSTS", "AbsTaskSummarization", "MultilingualTask", + "TaskMetadata", ] diff --git a/tests/test_TaskMetadata.py b/tests/test_TaskMetadata.py index 873264bc80..f3e6b48260 100644 --- a/tests/test_TaskMetadata.py +++ b/tests/test_TaskMetadata.py @@ -3,8 +3,7 @@ import pytest from pydantic import ValidationError -from mteb import AbsTask -from mteb.abstasks.TaskMetadata import TaskMetadata +from mteb.abstasks import AbsTask, TaskMetadata from mteb.overview import get_tasks # Historic datasets without filled metadata. Do NOT add new datasets to this list. diff --git a/tests/test_load_results/test_mteb_results.py b/tests/test_load_results/test_mteb_results.py index 60371f5c23..a83eeb7979 100644 --- a/tests/test_load_results/test_mteb_results.py +++ b/tests/test_load_results/test_mteb_results.py @@ -5,8 +5,7 @@ import pytest -from mteb import AbsTask -from mteb.abstasks.TaskMetadata import TaskMetadata +from mteb.abstasks import AbsTask, TaskMetadata from mteb.load_results.task_results import TaskResult tests_folder = Path(__file__).parent.parent From 8653c27899a3a3d72dfcf5e6a35d6f53fbe24aa2 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:43:03 +0300 Subject: [PATCH 75/76] fix tests --- tests/test_benchmark/test_benchmark.py | 25 +++++++++++++------------ tests/test_reproducible_workflow.py | 7 ++++--- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/tests/test_benchmark/test_benchmark.py b/tests/test_benchmark/test_benchmark.py index 660dd50c80..2ac30e46f4 100644 --- a/tests/test_benchmark/test_benchmark.py +++ b/tests/test_benchmark/test_benchmark.py @@ -12,6 +12,7 @@ import mteb import mteb.overview +from mteb.abstasks import AbsTask from mteb.benchmarks.benchmarks import Benchmark from mteb.create_meta import generate_readme @@ -38,7 +39,7 @@ @pytest.mark.parametrize("tasks", [MOCK_TASK_TEST_GRID]) @pytest.mark.parametrize("model", [MockNumpyEncoder()]) def test_mulitple_mteb_tasks( - tasks: list[mteb.AbsTask], model: mteb.Encoder, tmp_path: Path + tasks: list[AbsTask], model: mteb.Encoder, tmp_path: Path ): """Test that multiple tasks can be run""" eval = mteb.MTEB(tasks=tasks) @@ -57,7 +58,7 @@ def test_mulitple_mteb_tasks( MockTorchbf16Encoder(), ], ) -def test_benchmark_encoders_on_task(task: str | mteb.AbsTask, model: mteb.Encoder): +def test_benchmark_encoders_on_task(task: str | AbsTask, model: mteb.Encoder): """Test that a task can be fetched and run using a variety of encoders""" if isinstance(task, str): tasks = mteb.get_tasks(tasks=[task]) @@ -70,7 +71,7 @@ def test_benchmark_encoders_on_task(task: str | mteb.AbsTask, model: mteb.Encode @pytest.mark.parametrize("task", MOCK_TASK_TEST_GRID[:1]) @pytest.mark.parametrize("model", [MockNumpyEncoder()]) -def test_reload_results(task: str | mteb.AbsTask, model: mteb.Encoder, tmp_path: Path): +def test_reload_results(task: str | AbsTask, model: mteb.Encoder, tmp_path: Path): """Test that when rerunning the results are reloaded correctly""" if isinstance(task, str): tasks = mteb.get_tasks(tasks=[task]) @@ -91,12 +92,12 @@ def test_reload_results(task: str | mteb.AbsTask, model: mteb.Encoder, tmp_path: @pytest.mark.parametrize("task_name", MOCK_TASK_TEST_GRID) -def test_prompt_name_passed_to_all_encodes(task_name: str | mteb.AbsTask): +def test_prompt_name_passed_to_all_encodes(task_name: str | AbsTask): """Test that all tasks correctly pass down the prompt_name to the encoder which supports it, and that the encoder which does not support it does not receive it. """ _task_name = ( - task_name.metadata.name if isinstance(task_name, mteb.AbsTask) else task_name + task_name.metadata.name if isinstance(task_name, AbsTask) else task_name ) class MockEncoderWithInstructions(mteb.Encoder): @@ -109,7 +110,7 @@ def encode(self, sentences, **kwargs): assert kwargs["prompt_name"] is None return super().encode(sentences, **kwargs) - if isinstance(task_name, mteb.AbsTask): + if isinstance(task_name, AbsTask): tasks = [task_name] else: tasks = mteb.get_tasks(tasks=[task_name]) @@ -134,7 +135,7 @@ def encode(self, sentences, **kwargs): @pytest.mark.parametrize("task_name", MOCK_TASK_TEST_GRID) -def test_encode_kwargs_passed_to_all_encodes(task_name: str | mteb.AbsTask): +def test_encode_kwargs_passed_to_all_encodes(task_name: str | AbsTask): """Test that all tasks correctly pass down the encode_kwargs to the encoder.""" my_encode_kwargs = {"no_one_uses_this_args": "but_its_here"} @@ -147,7 +148,7 @@ def encode(self, sentences, prompt_name: str | None = None, **kwargs): ) return np.zeros((len(sentences), 10)) - if isinstance(task_name, mteb.AbsTask): + if isinstance(task_name, AbsTask): tasks = [task_name] else: tasks = mteb.get_tasks(tasks=[task_name]) @@ -212,12 +213,12 @@ def test_get_benchmark(name): @pytest.mark.parametrize("task", MOCK_TASK_TEST_GRID) @pytest.mark.parametrize("is_task_name", [True, False]) def test_prompt_name_passed_to_all_encodes_with_prompts( - task: mteb.AbsTask | str, is_task_name: bool + task: AbsTask | str, is_task_name: bool ): """Test that all tasks and task_types correctly pass down the prompt_name to the encoder with prompts.""" - _task_name = task.metadata.name if isinstance(task, mteb.AbsTask) else task + _task_name = task.metadata.name if isinstance(task, AbsTask) else task - if isinstance(task, mteb.AbsTask): + if isinstance(task, AbsTask): tasks = [task] _task_type = task.metadata.type else: @@ -276,7 +277,7 @@ def encode(self, sentences, prompt_name: str | None = None, **kwargs): ) @pytest.mark.parametrize("is_task_name", [True, False]) def test_model_query_passage_prompts_task_type( - task: mteb.AbsTask | str, is_task_name: bool + task: AbsTask | str, is_task_name: bool ): """Test that the model with prompts is correctly called.""" tasks = [task] diff --git a/tests/test_reproducible_workflow.py b/tests/test_reproducible_workflow.py index 308153d2a9..04ca6c0acc 100644 --- a/tests/test_reproducible_workflow.py +++ b/tests/test_reproducible_workflow.py @@ -6,6 +6,7 @@ import mteb from mteb import MTEB +from mteb.abstasks import AbsTask from mteb.encoder_interface import Encoder from mteb.model_meta import ModelMeta from mteb.models.wrapper import Wrapper @@ -23,7 +24,7 @@ def test_reproducibility_workflow(task_name: str, model_name: str, model_revisio task = mteb.get_task(task_name) assert isinstance(model_meta, ModelMeta) - assert isinstance(task, mteb.AbsTask) + assert isinstance(task, AbsTask) model = mteb.get_model(model_name, revision=model_revision) assert isinstance(model, Encoder) @@ -50,8 +51,8 @@ def test_reproducibility_workflow(task_name: str, model_name: str, model_revisio "Speed", ], ) -def test_validate_task_to_prompt_name(task_name: str | mteb.AbsTask): - if isinstance(task_name, mteb.AbsTask): +def test_validate_task_to_prompt_name(task_name: str | AbsTask): + if isinstance(task_name, AbsTask): task_names = [task_name.metadata.name] else: task_names = [task_name] From 4ba6ff58502b12d45caf9d756c96304a5bfa0aee Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:43:41 +0300 Subject: [PATCH 76/76] lint --- tests/test_benchmark/test_benchmark.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/test_benchmark/test_benchmark.py b/tests/test_benchmark/test_benchmark.py index 2ac30e46f4..b654bd62ea 100644 --- a/tests/test_benchmark/test_benchmark.py +++ b/tests/test_benchmark/test_benchmark.py @@ -38,9 +38,7 @@ @pytest.mark.parametrize("tasks", [MOCK_TASK_TEST_GRID]) @pytest.mark.parametrize("model", [MockNumpyEncoder()]) -def test_mulitple_mteb_tasks( - tasks: list[AbsTask], model: mteb.Encoder, tmp_path: Path -): +def test_mulitple_mteb_tasks(tasks: list[AbsTask], model: mteb.Encoder, tmp_path: Path): """Test that multiple tasks can be run""" eval = mteb.MTEB(tasks=tasks) eval.run(model, output_folder=str(tmp_path), overwrite_results=True) @@ -276,9 +274,7 @@ def encode(self, sentences, prompt_name: str | None = None, **kwargs): ], ) @pytest.mark.parametrize("is_task_name", [True, False]) -def test_model_query_passage_prompts_task_type( - task: AbsTask | str, is_task_name: bool -): +def test_model_query_passage_prompts_task_type(task: AbsTask | str, is_task_name: bool): """Test that the model with prompts is correctly called.""" tasks = [task]