Skip to content

Commit

Permalink
Rework utf8, bool return
Browse files Browse the repository at this point in the history
  • Loading branch information
nyurik committed May 13, 2024
1 parent dac157a commit 4e778a1
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 89 deletions.
59 changes: 24 additions & 35 deletions src/enc/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ use super::metablock::{
};
pub use super::parameters::BrotliEncoderParameter;
use super::static_dict::{kNumDistanceCacheEntries, BrotliGetDictionary};
use super::utf8_util::BrotliIsMostlyUTF8;
use super::util::Log2FloorNonZero;
use crate::enc::floatX;
use crate::enc::input_pair::InputReferenceMut;
use crate::enc::utf8_util::is_mostly_utf8;

//fn BrotliCreateHqZopfliBackwardReferences(m: &mut [MemoryManager],
// dictionary: &[BrotliDictionary],
Expand Down Expand Up @@ -1283,46 +1284,35 @@ fn InitOrStitchToPreviousBlock<Alloc: alloc::Allocator<u16> + alloc::Allocator<u
handle.StitchToPreviousBlock(input_size, position, data, mask);
}

fn ShouldCompress(
fn should_compress(
data: &[u8],
mask: usize,
last_flush_pos: u64,
bytes: usize,
num_literals: usize,
num_commands: usize,
) -> i32 {
if num_commands < (bytes >> 8).wrapping_add(2)
&& num_literals as (super::util::floatX)
> 0.99 as super::util::floatX * bytes as (super::util::floatX)
{
) -> bool {
const K_SAMPLE_RATE: u32 = 13;
const K_MIN_ENTROPY: floatX = 7.92;

if num_commands < (bytes >> 8) + 2 && num_literals as floatX > 0.99 * bytes as floatX {
let mut literal_histo = [0u32; 256];
static kSampleRate: u32 = 13u32;
static kMinEntropy: super::util::floatX = 7.92 as super::util::floatX;
let bit_cost_threshold: super::util::floatX =
bytes as (super::util::floatX) * kMinEntropy / kSampleRate as (super::util::floatX);
let t: usize = bytes
.wrapping_add(kSampleRate as usize)
let bit_cost_threshold = bytes as floatX * K_MIN_ENTROPY / K_SAMPLE_RATE as floatX;
let t = bytes
.wrapping_add(K_SAMPLE_RATE as usize)
.wrapping_sub(1)
.wrapping_div(kSampleRate as usize);
let mut pos: u32 = last_flush_pos as u32;
let mut i: usize;
i = 0usize;
while i < t {
{
{
let _rhs = 1;
let _lhs = &mut literal_histo[data[(pos as usize & mask)] as usize];
*_lhs = (*_lhs).wrapping_add(_rhs as u32);
}
pos = pos.wrapping_add(kSampleRate);
}
i = i.wrapping_add(1);
}
if BitsEntropy(&literal_histo[..], 256usize) > bit_cost_threshold {
return 0i32;
.wrapping_div(K_SAMPLE_RATE as usize);
let mut pos = last_flush_pos as u32;
for _ in 0..t {
let value = &mut literal_histo[data[pos as usize & mask] as usize];
*value = value.wrapping_add(1);
pos = pos.wrapping_add(K_SAMPLE_RATE);
}
if BitsEntropy(&literal_histo[..], 256) > bit_cost_threshold {
return false;
}
}
1i32
true
}

/* Chooses the literal context mode for a metablock */
Expand All @@ -1342,7 +1332,7 @@ fn ChooseContextMode(
BrotliEncoderMode::BROTLI_FORCE_SIGNED_PRIOR => return ContextType::CONTEXT_SIGNED,
_ => {}
}
if (params.quality >= 10 && BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio) == 0) {
if (params.quality >= 10 && !is_mostly_utf8(data, pos, mask, length, kMinUTF8Ratio)) {
return ContextType::CONTEXT_SIGNED;
}
ContextType::CONTEXT_UTF8
Expand Down Expand Up @@ -1939,15 +1929,14 @@ fn WriteMetaBlockInternal<Alloc: BrotliAlloc, Cb>(
*storage_ix = storage_ix.wrapping_add(7u32 as usize) & !7u32 as usize;
return;
}
if ShouldCompress(
if !should_compress(
data,
mask,
last_flush_pos,
bytes,
num_literals,
num_commands,
) == 0
{
) {
dist_cache[..4].clone_from_slice(&saved_dist_cache[..4]);
BrotliStoreUncompressedMetaBlock(
alloc,
Expand Down
4 changes: 2 additions & 2 deletions src/enc/literal_cost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

use core::cmp::min;

use super::utf8_util::BrotliIsMostlyUTF8;
use super::util::FastLog2f64;
use crate::enc::utf8_util::is_mostly_utf8;

static kMinUTF8Ratio: super::util::floatX = 0.75 as super::util::floatX;

Expand Down Expand Up @@ -184,7 +184,7 @@ pub fn BrotliEstimateBitCostsForLiterals(
data: &[u8],
cost: &mut [super::util::floatX],
) {
if BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio) != 0 {
if is_mostly_utf8(data, pos, mask, len, kMinUTF8Ratio) {
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
} else {
let mut histogram: [usize; 256] = [0; 256];
Expand Down
100 changes: 48 additions & 52 deletions src/enc/utf8_util.rs
Original file line number Diff line number Diff line change
@@ -1,77 +1,73 @@
#![allow(dead_code)]
use crate::enc::floatX;

static kMinUTF8Ratio: super::util::floatX = 0.75 as super::util::floatX;

fn BrotliParseAsUTF8(symbol: &mut i32, input: &[u8], size: usize) -> usize {
if input[0] & 0x80 == 0 {
*symbol = input[0] as i32;
if *symbol > 0i32 {
return 1usize;
fn parse_as_utf8(input: &[u8], size: usize) -> (usize, i32) {
if (input[0] & 0x80) == 0 {
if input[0] > 0 {
return (1, i32::from(input[0]));
}
}
if size > 1u32 as usize
&& (input[0] as i32 & 0xe0i32 == 0xc0i32)
&& (input[1] as i32 & 0xc0i32 == 0x80i32)
{
*symbol = (input[0] as i32 & 0x1fi32) << 6 | input[1] as i32 & 0x3fi32;
if *symbol > 0x7fi32 {
return 2usize;
if size > 1 && (input[0] & 0xe0) == 0xc0 && (input[1] & 0xc0) == 0x80 {
let symbol = (input[0] as i32 & 0x1f) << 6 | input[1] as i32 & 0x3f;
if symbol > 0x7f {
return (2, symbol);
}
}
if size > 2u32 as usize
&& (input[0] as i32 & 0xf0i32 == 0xe0i32)
&& (input[1] as i32 & 0xc0i32 == 0x80i32)
&& (input[2] as i32 & 0xc0i32 == 0x80i32)
if size > 2
&& (input[0] & 0xf0) == 0xe0
&& (input[1] & 0xc0) == 0x80
&& (input[2] & 0xc0) == 0x80
{
*symbol = (input[0] as i32 & 0xfi32) << 12
| (input[1] as i32 & 0x3fi32) << 6
| input[2] as i32 & 0x3fi32;
if *symbol > 0x7ffi32 {
return 3usize;
let symbol = (i32::from(input[0]) & 0x0f) << 12
| (i32::from(input[1]) & 0x3f) << 6
| i32::from(input[2]) & 0x3f;
if symbol > 0x7ff {
return (3, symbol);
}
}
if size > 3u32 as usize
&& (input[0] as i32 & 0xf8i32 == 0xf0i32)
&& (input[1] as i32 & 0xc0i32 == 0x80i32)
&& (input[2] as i32 & 0xc0i32 == 0x80i32)
&& (input[3] as i32 & 0xc0i32 == 0x80i32)
if size > 3
&& (input[0] & 0xf8) == 0xf0
&& (input[1] & 0xc0) == 0x80
&& (input[2] & 0xc0) == 0x80
&& (input[3] & 0xc0) == 0x80
{
*symbol = (input[0] as i32 & 0x7i32) << 18
| (input[1] as i32 & 0x3fi32) << 12
| (input[2] as i32 & 0x3fi32) << 6
| input[3] as i32 & 0x3fi32;
if *symbol > 0xffffi32 && (*symbol <= 0x10ffffi32) {
return 4usize;
let symbol = (i32::from(input[0]) & 0x07) << 18
| (i32::from(input[1]) & 0x3f) << 12
| (i32::from(input[2]) & 0x3f) << 6
| i32::from(input[3]) & 0x3f;
if symbol > 0xffff && symbol <= 0x10_ffff {
return (4, symbol);
}
}
*symbol = 0x110000i32 | input[0] as i32;
1usize

(1, 0x11_0000 | i32::from(input[0]))
}

#[deprecated(note = "Use is_mostly_utf8 instead")]
pub fn BrotliIsMostlyUTF8(
data: &[u8],
pos: usize,
mask: usize,
length: usize,
min_fraction: super::util::floatX,
min_fraction: floatX,
) -> i32 {
let mut size_utf8: usize = 0usize;
let mut i: usize = 0usize;
is_mostly_utf8(data, pos, mask, length, min_fraction).into()
}

pub(crate) fn is_mostly_utf8(
data: &[u8],
pos: usize,
mask: usize,
length: usize,
min_fraction: floatX,
) -> bool {
let mut size_utf8: usize = 0;
let mut i: usize = 0;
while i < length {
let mut symbol: i32 = 0;
let bytes_read: usize = BrotliParseAsUTF8(
&mut symbol,
&data[(pos.wrapping_add(i) & mask)..],
length.wrapping_sub(i),
);
let (bytes_read, symbol) = parse_as_utf8(&data[(pos.wrapping_add(i) & mask)..], length - i);
i = i.wrapping_add(bytes_read);
if symbol < 0x110000i32 {
if symbol < 0x11_0000 {
size_utf8 = size_utf8.wrapping_add(bytes_read);
}
}
if size_utf8 as (super::util::floatX) > min_fraction * length as (super::util::floatX) {
1i32
} else {
0i32
}
size_utf8 as floatX > min_fraction * length as floatX
}

0 comments on commit 4e778a1

Please sign in to comment.