Skip to content

Commit

Permalink
Implement formatting arguments for strings and integers
Browse files Browse the repository at this point in the history
  • Loading branch information
alexcrichton committed Aug 13, 2013
1 parent 44675ac commit b820748
Show file tree
Hide file tree
Showing 6 changed files with 295 additions and 105 deletions.
38 changes: 38 additions & 0 deletions src/libstd/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ use unicode::{derived_property, general_category};
#[cfg(not(test))] use cmp::{Eq, Ord};
#[cfg(not(test))] use num::Zero;

// UTF-8 ranges and tags for encoding characters
static TAG_CONT: uint = 128u;
static MAX_ONE_B: uint = 128u;
static TAG_TWO_B: uint = 192u;
static MAX_TWO_B: uint = 2048u;
static TAG_THREE_B: uint = 224u;
static MAX_THREE_B: uint = 65536u;
static TAG_FOUR_B: uint = 240u;

/*
Lu Uppercase_Letter an uppercase letter
Ll Lowercase_Letter a lowercase letter
Expand Down Expand Up @@ -278,6 +287,12 @@ pub trait Char {
fn escape_unicode(&self, f: &fn(char));
fn escape_default(&self, f: &fn(char));
fn len_utf8_bytes(&self) -> uint;

/// Encodes this character as utf-8 into the provided byte-buffer. The
/// buffer must be at least 4 bytes long or a runtime failure will occur.
///
/// This will then return the number of characters written to the slice.
fn encode_utf8(&self, dst: &mut [u8]) -> uint;
}

impl Char for char {
Expand Down Expand Up @@ -308,6 +323,29 @@ impl Char for char {
fn escape_default(&self, f: &fn(char)) { escape_default(*self, f) }

fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) }

fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint {
let code = *self as uint;
if code < MAX_ONE_B {
dst[0] = code as u8;
return 1;
} else if code < MAX_TWO_B {
dst[0] = (code >> 6u & 31u | TAG_TWO_B) as u8;
dst[1] = (code & 63u | TAG_CONT) as u8;
return 2;
} else if code < MAX_THREE_B {
dst[0] = (code >> 12u & 15u | TAG_THREE_B) as u8;
dst[1] = (code >> 6u & 63u | TAG_CONT) as u8;
dst[2] = (code & 63u | TAG_CONT) as u8;
return 3;
} else {
dst[0] = (code >> 18u & 7u | TAG_FOUR_B) as u8;
dst[1] = (code >> 12u & 63u | TAG_CONT) as u8;
dst[2] = (code >> 6u & 63u | TAG_CONT) as u8;
dst[3] = (code & 63u | TAG_CONT) as u8;
return 4;
}
}
}

#[cfg(not(test))]
Expand Down
167 changes: 145 additions & 22 deletions src/libstd/fmt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
use prelude::*;

use cast;
use int;
use char::Char;
use rt::io::Decorator;
use rt::io::mem::MemWriter;
use rt::io;
Expand Down Expand Up @@ -122,6 +122,11 @@ pub unsafe fn sprintf(fmt: &[rt::Piece], args: &[Argument]) -> ~str {
}

impl<'self> Formatter<'self> {

// First up is the collection of functions used to execute a format string
// at runtime. This consumes all of the compile-time statics generated by
// the ifmt! syntax extension.

fn run(&mut self, piece: &rt::Piece, cur: Option<&str>) {
let setcount = |slot: &mut Option<uint>, cnt: &parse::Count| {
match *cnt {
Expand Down Expand Up @@ -240,6 +245,118 @@ impl<'self> Formatter<'self> {
}
}
}

// Helper methods used for padding and processing formatting arguments that
// all formatting traits can use.

/// TODO: dox
pub fn pad_integral(&mut self, s: &[u8], alternate_prefix: &str,
positive: bool) {
use fmt::parse::{FlagAlternate, FlagSignPlus};

let mut actual_len = s.len();
if self.flags & 1 << (FlagAlternate as uint) != 0 {
actual_len += alternate_prefix.len();
}
if self.flags & 1 << (FlagSignPlus as uint) != 0 {
actual_len += 1;
}
if !positive {
actual_len += 1;
}

let emit = |this: &mut Formatter| {
if this.flags & 1 << (FlagSignPlus as uint) != 0 && positive {
this.buf.write(['+' as u8]);
} else if !positive {
this.buf.write(['-' as u8]);
}
if this.flags & 1 << (FlagAlternate as uint) != 0 {
this.buf.write(alternate_prefix.as_bytes());
}
this.buf.write(s);
};

match self.width {
None => { emit(self) }
Some(min) if actual_len >= min => { emit(self) }
Some(min) => {
do self.with_padding(min - actual_len) |me| {
emit(me);
}
}
}
}

/// This function takes a string slice and emits it to the internal buffer
/// after applying the relevant formatting flags specified. The flags
/// recognized for generic strings are:
///
/// * width - the minimum width of what to emit
/// * fill/alignleft - what to emit and where to emit it if the string
/// provided needs to be padded
/// * precision - the maximum length to emit, the string is truncated if it
/// is longer than this length
///
/// Notably this function ignored the `flag` parameters
pub fn pad(&mut self, s: &str) {
// Make sure there's a fast path up front
if self.width.is_none() && self.precision.is_none() {
self.buf.write(s.as_bytes());
return
}
// The `precision` field can be interpreted as a `max-width` for the
// string being formatted
match self.precision {
Some(max) => {
// If there's a maximum width and our string is longer than
// that, then we must always have truncation. This is the only
// case where the maximum length will matter.
let char_len = s.char_len();
if char_len >= max {
let nchars = uint::min(max, char_len);
self.buf.write(s.slice_chars(0, nchars).as_bytes());
return
}
}
None => {}
}

// The `width` field is more of a `min-width` parameter at this point.
match self.width {
// If we're under the maximum length, and there's no minimum length
// requirements, then we can just emit the string
None => { self.buf.write(s.as_bytes()) }

// If we're under the maximum width, check if we're over the minimum
// width, if so it's as easy as just emitting the string.
Some(width) if s.char_len() >= width => {
self.buf.write(s.as_bytes())
}

// If we're under both the maximum and the minimum width, then fill
// up the minimum width with the specified string + some alignment.
Some(width) => {
do self.with_padding(width - s.len()) |me| {
me.buf.write(s.as_bytes());
}
}
}
}

fn with_padding(&mut self, padding: uint, f: &fn(&mut Formatter)) {
if self.alignleft {
f(self);
}
let mut fill = [0u8, ..4];
let len = self.fill.encode_utf8(fill);
for _ in range(0, padding) {
self.buf.write(fill.slice_to(len));
}
if !self.alignleft {
f(self);
}
}
}

/// This is a function which calls are emitted to by the compiler itself to
Expand Down Expand Up @@ -279,60 +396,53 @@ impl Bool for bool {

impl<'self> String for &'self str {
fn fmt(s: & &'self str, f: &mut Formatter) {
// XXX: formatting args
f.buf.write(s.as_bytes())
f.pad(*s);
}
}

impl Char for char {
fn fmt(c: &char, f: &mut Formatter) {
// XXX: formatting args
// XXX: shouldn't require an allocation
let mut s = ~"";
s.push_char(*c);
f.buf.write(s.as_bytes());
let mut utf8 = [0u8, ..4];
let amt = c.encode_utf8(utf8);
let s: &str = unsafe { cast::transmute(utf8.slice_to(amt)) };
String::fmt(&s, f);
}
}

impl Signed for int {
fn fmt(c: &int, f: &mut Formatter) {
// XXX: formatting args
do int::to_str_bytes(*c, 10) |buf| {
f.buf.write(buf);
do uint::to_str_bytes(c.abs() as uint, 10) |buf| {
f.pad_integral(buf, "", *c >= 0);
}
}
}

impl Unsigned for uint {
fn fmt(c: &uint, f: &mut Formatter) {
// XXX: formatting args
do uint::to_str_bytes(*c, 10) |buf| {
f.buf.write(buf);
f.pad_integral(buf, "", true);
}
}
}

impl Octal for uint {
fn fmt(c: &uint, f: &mut Formatter) {
// XXX: formatting args
do uint::to_str_bytes(*c, 8) |buf| {
f.buf.write(buf);
f.pad_integral(buf, "0o", true);
}
}
}

impl LowerHex for uint {
fn fmt(c: &uint, f: &mut Formatter) {
// XXX: formatting args
do uint::to_str_bytes(*c, 16) |buf| {
f.buf.write(buf);
f.pad_integral(buf, "0x", true);
}
}
}

impl UpperHex for uint {
fn fmt(c: &uint, f: &mut Formatter) {
// XXX: formatting args
do uint::to_str_bytes(*c, 16) |buf| {
let mut local = [0u8, ..16];
for (l, &b) in local.mut_iter().zip(buf.iter()) {
Expand All @@ -341,16 +451,29 @@ impl UpperHex for uint {
_ => b,
};
}
f.buf.write(local.slice_to(buf.len()));
f.pad_integral(local.slice_to(buf.len()), "0x", true);
}
}
}

impl<T> Poly for T {
fn fmt(t: &T, f: &mut Formatter) {
// XXX: formatting args
let s = sys::log_str(t);
f.buf.write(s.as_bytes());
match (f.width, f.precision) {
(None, None) => {
// XXX: sys::log_str should have a variant which takes a stream
// and we should directly call that (avoids unnecessary
// allocations)
let s = sys::log_str(t);
f.buf.write(s.as_bytes());
}

// If we have a specified width for formatting, then we have to make
// this allocation of a new string
_ => {
let s = sys::log_str(t);
f.pad(s);
}
}
}
}

Expand Down
53 changes: 12 additions & 41 deletions src/libstd/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use ptr;
use ptr::RawPtr;
use to_str::ToStr;
use uint;
use unstable::raw::{Repr, Slice};
use vec;
use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector};

Expand Down Expand Up @@ -758,15 +759,7 @@ macro_rules! utf8_acc_cont_byte(
($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as uint)
)

// UTF-8 tags and ranges
static TAG_CONT_U8: u8 = 128u8;
static TAG_CONT: uint = 128u;
static MAX_ONE_B: uint = 128u;
static TAG_TWO_B: uint = 192u;
static MAX_TWO_B: uint = 2048u;
static TAG_THREE_B: uint = 224u;
static MAX_THREE_B: uint = 65536u;
static TAG_FOUR_B: uint = 240u;
static MAX_UNICODE: uint = 1114112u;

/// Unsafe operations
Expand Down Expand Up @@ -1988,40 +1981,18 @@ impl OwnedStr for ~str {
#[inline]
fn push_char(&mut self, c: char) {
assert!((c as uint) < MAX_UNICODE); // FIXME: #7609: should be enforced on all `char`
let cur_len = self.len();
self.reserve_at_least(cur_len + 4); // may use up to 4 bytes
// Attempt to not use an intermediate buffer by just pushing bytes
// directly onto this string.
unsafe {
let code = c as uint;
let nb = if code < MAX_ONE_B { 1u }
else if code < MAX_TWO_B { 2u }
else if code < MAX_THREE_B { 3u }
else { 4u };
let len = self.len();
let new_len = len + nb;
self.reserve_at_least(new_len);
let off = len as int;
do self.as_mut_buf |buf, _len| {
match nb {
1u => {
*ptr::mut_offset(buf, off) = code as u8;
}
2u => {
*ptr::mut_offset(buf, off) = (code >> 6u & 31u | TAG_TWO_B) as u8;
*ptr::mut_offset(buf, off + 1) = (code & 63u | TAG_CONT) as u8;
}
3u => {
*ptr::mut_offset(buf, off) = (code >> 12u & 15u | TAG_THREE_B) as u8;
*ptr::mut_offset(buf, off + 1) = (code >> 6u & 63u | TAG_CONT) as u8;
*ptr::mut_offset(buf, off + 2) = (code & 63u | TAG_CONT) as u8;
}
4u => {
*ptr::mut_offset(buf, off) = (code >> 18u & 7u | TAG_FOUR_B) as u8;
*ptr::mut_offset(buf, off + 1) = (code >> 12u & 63u | TAG_CONT) as u8;
*ptr::mut_offset(buf, off + 2) = (code >> 6u & 63u | TAG_CONT) as u8;
*ptr::mut_offset(buf, off + 3) = (code & 63u | TAG_CONT) as u8;
}
_ => {}
}
}
raw::set_len(self, new_len);
let v = self.repr();
let len = c.encode_utf8(cast::transmute(Slice {
data: ((&(*v).data) as *u8).offset(cur_len as int),
len: 4,
}));
raw::set_len(self, cur_len + len);
}
}
Expand Down
1 change: 1 addition & 0 deletions src/libstd/unstable/raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ impl<'self, T> Repr<Slice<T>> for &'self [T] {}
impl<'self> Repr<Slice<u8>> for &'self str {}
impl<T> Repr<*Box<T>> for @T {}
impl<T> Repr<*Box<Vec<T>>> for @[T] {}
impl Repr<*String> for ~str {}

// sure would be nice to have this
// impl<T> Repr<*Vec<T>> for ~[T] {}
Loading

0 comments on commit b820748

Please sign in to comment.