std: Redesign c_str and c_vec

This commit is an implementation of [RFC 494][rfc] which removes the entire `std::c_vec` module and redesigns the `std::c_str` module as `std::ffi`. [rfc]: https://github.com/rust-lang/rfcs/blob/master/text/0494-c_str-and-c_vec-stability.md The interface of the new `CString` is outlined in the linked RFC, the primary changes being: * The `ToCStr` trait is gone, meaning the `with_c_str` and `to_c_str` methods are now gone. These two methods are replaced with a `CString::from_slice` method. * The `CString` type is now just a wrapper around `Vec<u8>` with a static guarantee that there is a trailing nul byte with no internal nul bytes. This means that `CString` now implements `Deref<Target = [c_char]>`, which is where it gains most of its methods from. A few helper methods are added to acquire a slice of `u8` instead of `c_char`, as well as including a slice with the trailing nul byte if necessary. * All usage of non-owned `CString` values is now done via two functions inside of `std::ffi`, called `c_str_to_bytes` and `c_str_to_bytes_with_nul`. These functions are now the one method used to convert a `*const c_char` to a Rust slice of `u8`. Many more details, including newly deprecated methods, can be found linked in the RFC. This is a: [breaking-change] Closes #20444
rust-lang · Jan 5, 2015 · ec7a50d · ec7a50d
1 parent 1f732ef
commit ec7a50d
Show file tree

Hide file tree

Showing 59 changed files with 1,023 additions and 2,003 deletions.
diff --git a/src/doc/guide-ffi.md b/src/doc/guide-ffi.md
@@ -451,7 +451,7 @@ them.
 ~~~no_run
 extern crate libc;
 
-use std::c_str::ToCStr;
+use std::ffi::CString;
 use std::ptr;
 
 #[link(name = "readline")]
@@ -460,11 +460,10 @@ extern {
 }
 
 fn main() {
-    "[my-awesome-shell] $".with_c_str(|buf| {
-        unsafe { rl_prompt = buf; }
-        // get a line, process it
-        unsafe { rl_prompt = ptr::null(); }
-    });
+    let prompt = CString::from_slice(b"[my-awesome-shell] $");
+    unsafe { rl_prompt = prompt.as_ptr(); }
+    // get a line, process it
+    unsafe { rl_prompt = ptr::null(); }
 }
 ~~~
 
@@ -509,23 +508,28 @@ to define a block for all windows systems, not just x86 ones.
 
 # Interoperability with foreign code
 
-Rust guarantees that the layout of a `struct` is compatible with the platform's representation in C
-only if the `#[repr(C)]` attribute is applied to it.  `#[repr(C, packed)]` can be used to lay out
-struct members without padding.  `#[repr(C)]` can also be applied to an enum.
-
-Rust's owned boxes (`Box<T>`) use non-nullable pointers as handles which point to the contained
-object. However, they should not be manually created because they are managed by internal
-allocators. References can safely be assumed to be non-nullable pointers directly to the type.
-However, breaking the borrow checking or mutability rules is not guaranteed to be safe, so prefer
-using raw pointers (`*`) if that's needed because the compiler can't make as many assumptions about
-them.
-
-Vectors and strings share the same basic memory layout, and utilities are available in the `vec` and
-`str` modules for working with C APIs. However, strings are not terminated with `\0`. If you need a
-NUL-terminated string for interoperability with C, you should use the `c_str::to_c_str` function.
-
-The standard library includes type aliases and function definitions for the C standard library in
-the `libc` module, and Rust links against `libc` and `libm` by default.
+Rust guarantees that the layout of a `struct` is compatible with the platform's
+representation in C only if the `#[repr(C)]` attribute is applied to it.
+`#[repr(C, packed)]` can be used to lay out struct members without padding.
+`#[repr(C)]` can also be applied to an enum.
+
+Rust's owned boxes (`Box<T>`) use non-nullable pointers as handles which point
+to the contained object. However, they should not be manually created because
+they are managed by internal allocators. References can safely be assumed to be
+non-nullable pointers directly to the type.  However, breaking the borrow
+checking or mutability rules is not guaranteed to be safe, so prefer using raw
+pointers (`*`) if that's needed because the compiler can't make as many
+assumptions about them.
+
+Vectors and strings share the same basic memory layout, and utilities are
+available in the `vec` and `str` modules for working with C APIs. However,
+strings are not terminated with `\0`. If you need a NUL-terminated string for
+interoperability with C, you should use the `CString` type in the `std::ffi`
+module.
+
+The standard library includes type aliases and function definitions for the C
+standard library in the `libc` module, and Rust links against `libc` and `libm`
+by default.
 
 # The "nullable pointer optimization"
 

diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs
@@ -320,30 +320,6 @@ impl String {
         }
     }
 
-    /// Creates a `String` from a null-terminated `*const u8` buffer.
-    ///
-    /// This function is unsafe because we dereference memory until we find the
-    /// NUL character, which is not guaranteed to be present. Additionally, the
-    /// slice is not checked to see whether it contains valid UTF-8
-    #[unstable = "just renamed from `mod raw`"]
-    pub unsafe fn from_raw_buf(buf: *const u8) -> String {
-        String::from_str(str::from_c_str(buf as *const i8))
-    }
-
-    /// Creates a `String` from a `*const u8` buffer of the given length.
-    ///
-    /// This function is unsafe because it blindly assumes the validity of the
-    /// pointer `buf` for `len` bytes of memory. This function will copy the
-    /// memory from `buf` into a new allocation (owned by the returned
-    /// `String`).
-    ///
-    /// This function is also unsafe because it does not validate that the
-    /// buffer is valid UTF-8 encoded data.
-    #[unstable = "just renamed from `mod raw`"]
-    pub unsafe fn from_raw_buf_len(buf: *const u8, len: uint) -> String {
-        String::from_utf8_unchecked(Vec::from_raw_buf(buf, len))
-    }
-
     /// Converts a vector of bytes to a new `String` without checking if
     /// it contains valid UTF-8. This is unsafe because it assumes that
     /// the UTF-8-ness of the vector has already been validated.
@@ -1126,24 +1102,6 @@ mod tests {
                    String::from_str("\u{FFFD}𐒋\u{FFFD}"));
     }
 
-    #[test]
-    fn test_from_buf_len() {
-        unsafe {
-            let a = vec![65u8, 65, 65, 65, 65, 65, 65, 0];
-            assert_eq!(String::from_raw_buf_len(a.as_ptr(), 3), String::from_str("AAA"));
-        }
-    }
-
-    #[test]
-    fn test_from_buf() {
-        unsafe {
-            let a = vec![65, 65, 65, 65, 65, 65, 65, 0];
-            let b = a.as_ptr();
-            let c = String::from_raw_buf(b);
-            assert_eq!(c, String::from_str("AAAAAAA"));
-        }
-    }
-
     #[test]
     fn test_push_bytes() {
         let mut s = String::from_str("ABC");

diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs
@@ -190,7 +190,7 @@ pub unsafe fn from_utf8_unchecked<'a>(v: &'a [u8]) -> &'a str {
 /// # Panics
 ///
 /// This function will panic if the string pointed to by `s` is not valid UTF-8.
-#[unstable = "may change location based on the outcome of the c_str module"]
+#[deprecated = "use std::ffi::c_str_to_bytes + str::from_utf8"]
 pub unsafe fn from_c_str(s: *const i8) -> &'static str {
     let s = s as *const u8;
     let mut len = 0u;

diff --git a/src/libflate/lib.rs b/src/libflate/lib.rs
@@ -21,15 +21,34 @@
 #![doc(html_logo_url = "http://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
        html_favicon_url = "http://www.rust-lang.org/favicon.ico",
        html_root_url = "http://doc.rust-lang.org/nightly/")]
-#![feature(phase, unboxed_closures)]
+#![feature(phase, unboxed_closures, associated_types)]
 
 #[cfg(test)] #[phase(plugin, link)] extern crate log;
 
 extern crate libc;
 
 use libc::{c_void, size_t, c_int};
-use std::c_vec::CVec;
+use std::ops::Deref;
 use std::ptr::Unique;
+use std::slice;
+
+pub struct Bytes {
+    ptr: Unique<u8>,
+    len: uint,
+}
+
+impl Deref for Bytes {
+    type Target = [u8];
+    fn deref(&self) -> &[u8] {
+        unsafe { slice::from_raw_mut_buf(&self.ptr.0, self.len) }
+    }
+}
+
+impl Drop for Bytes {
+    fn drop(&mut self) {
+        unsafe { libc::free(self.ptr.0 as *mut _); }
+    }
+}
 
 #[link(name = "miniz", kind = "static")]
 extern {
@@ -52,55 +71,55 @@ static LZ_NORM : c_int = 0x80;  // LZ with 128 probes, "normal"
 static TINFL_FLAG_PARSE_ZLIB_HEADER : c_int = 0x1; // parse zlib header and adler32 checksum
 static TDEFL_WRITE_ZLIB_HEADER : c_int = 0x01000; // write zlib header and adler32 checksum
 
-fn deflate_bytes_internal(bytes: &[u8], flags: c_int) -> Option<CVec<u8>> {
+fn deflate_bytes_internal(bytes: &[u8], flags: c_int) -> Option<Bytes> {
     unsafe {
         let mut outsz : size_t = 0;
         let res = tdefl_compress_mem_to_heap(bytes.as_ptr() as *const _,
                                              bytes.len() as size_t,
                                              &mut outsz,
                                              flags);
         if !res.is_null() {
-            let res = Unique(res);
-            Some(CVec::new_with_dtor(res.0 as *mut u8, outsz as uint, move|:| libc::free(res.0)))
+            let res = Unique(res as *mut u8);
+            Some(Bytes { ptr: res, len: outsz as uint })
         } else {
             None
         }
     }
 }
 
 /// Compress a buffer, without writing any sort of header on the output.
-pub fn deflate_bytes(bytes: &[u8]) -> Option<CVec<u8>> {
+pub fn deflate_bytes(bytes: &[u8]) -> Option<Bytes> {
     deflate_bytes_internal(bytes, LZ_NORM)
 }
 
 /// Compress a buffer, using a header that zlib can understand.
-pub fn deflate_bytes_zlib(bytes: &[u8]) -> Option<CVec<u8>> {
+pub fn deflate_bytes_zlib(bytes: &[u8]) -> Option<Bytes> {
     deflate_bytes_internal(bytes, LZ_NORM | TDEFL_WRITE_ZLIB_HEADER)
 }
 
-fn inflate_bytes_internal(bytes: &[u8], flags: c_int) -> Option<CVec<u8>> {
+fn inflate_bytes_internal(bytes: &[u8], flags: c_int) -> Option<Bytes> {
     unsafe {
         let mut outsz : size_t = 0;
         let res = tinfl_decompress_mem_to_heap(bytes.as_ptr() as *const _,
                                                bytes.len() as size_t,
                                                &mut outsz,
                                                flags);
         if !res.is_null() {
-            let res = Unique(res);
-            Some(CVec::new_with_dtor(res.0 as *mut u8, outsz as uint, move|:| libc::free(res.0)))
+            let res = Unique(res as *mut u8);
+            Some(Bytes { ptr: res, len: outsz as uint })
         } else {
             None
         }
     }
 }
 
 /// Decompress a buffer, without parsing any sort of header on the input.
-pub fn inflate_bytes(bytes: &[u8]) -> Option<CVec<u8>> {
+pub fn inflate_bytes(bytes: &[u8]) -> Option<Bytes> {
     inflate_bytes_internal(bytes, 0)
 }
 
 /// Decompress a buffer that starts with a zlib header.
-pub fn inflate_bytes_zlib(bytes: &[u8]) -> Option<CVec<u8>> {
+pub fn inflate_bytes_zlib(bytes: &[u8]) -> Option<Bytes> {
     inflate_bytes_internal(bytes, TINFL_FLAG_PARSE_ZLIB_HEADER)
 }
 

diff --git a/src/librustc/metadata/cstore.rs b/src/librustc/metadata/cstore.rs
@@ -23,8 +23,8 @@ use metadata::loader;
 use util::nodemap::{FnvHashMap, NodeMap};
 
 use std::cell::RefCell;
-use std::c_vec::CVec;
 use std::rc::Rc;
+use flate::Bytes;
 use syntax::ast;
 use syntax::codemap::Span;
 use syntax::parse::token::IdentInterner;
@@ -36,7 +36,7 @@ use syntax::parse::token::IdentInterner;
 pub type cnum_map = FnvHashMap<ast::CrateNum, ast::CrateNum>;
 
 pub enum MetadataBlob {
-    MetadataVec(CVec<u8>),
+    MetadataVec(Bytes),
     MetadataArchive(loader::ArchiveMetadata),
 }
 

diff --git a/src/librustc/metadata/loader.rs b/src/librustc/metadata/loader.rs
@@ -226,7 +226,7 @@ use syntax::codemap::Span;
 use syntax::diagnostic::SpanHandler;
 use util::fs;
 
-use std::c_str::ToCStr;
+use std::ffi::CString;
 use std::cmp;
 use std::collections::{HashMap, HashSet};
 use std::io::fs::PathExtensions;
@@ -720,9 +720,8 @@ fn get_metadata_section_imp(is_osx: bool, filename: &Path) -> Result<MetadataBlo
         }
     }
     unsafe {
-        let mb = filename.with_c_str(|buf| {
-            llvm::LLVMRustCreateMemoryBufferWithContentsOfFile(buf)
-        });
+        let buf = CString::from_slice(filename.as_vec());
+        let mb = llvm::LLVMRustCreateMemoryBufferWithContentsOfFile(buf.as_ptr());
         if mb as int == 0 {
             return Err(format!("error reading library: '{}'",
                                filename.display()))
@@ -738,8 +737,9 @@ fn get_metadata_section_imp(is_osx: bool, filename: &Path) -> Result<MetadataBlo
         while llvm::LLVMIsSectionIteratorAtEnd(of.llof, si.llsi) == False {
             let mut name_buf = ptr::null();
             let name_len = llvm::LLVMRustGetSectionName(si.llsi, &mut name_buf);
-            let name = String::from_raw_buf_len(name_buf as *const u8,
-                                                name_len as uint);
+            let name = slice::from_raw_buf(&(name_buf as *const u8),
+                                           name_len as uint).to_vec();
+            let name = String::from_utf8(name).unwrap();
             debug!("get_metadata_section: name {}", name);
             if read_meta_section_name(is_osx) == name {
                 let cbuf = llvm::LLVMGetSectionContents(si.llsi);

diff --git a/src/librustc_llvm/archive_ro.rs b/src/librustc_llvm/archive_ro.rs
@@ -13,7 +13,7 @@
 use libc;
 use ArchiveRef;
 
-use std::c_str::ToCStr;
+use std::ffi::CString;
 use std::mem;
 use std::raw;
 
@@ -30,9 +30,8 @@ impl ArchiveRO {
     /// raised.
     pub fn open(dst: &Path) -> Option<ArchiveRO> {
         unsafe {
-            let ar = dst.with_c_str(|dst| {
-                ::LLVMRustOpenArchive(dst)
-            });
+            let s = CString::from_slice(dst.as_vec());
+            let ar = ::LLVMRustOpenArchive(s.as_ptr());
             if ar.is_null() {
                 None
             } else {
@@ -45,9 +44,9 @@ impl ArchiveRO {
     pub fn read<'a>(&'a self, file: &str) -> Option<&'a [u8]> {
         unsafe {
             let mut size = 0 as libc::size_t;
-            let ptr = file.with_c_str(|file| {
-                ::LLVMRustArchiveReadSection(self.ptr, file, &mut size)
-            });
+            let file = CString::from_slice(file.as_bytes());
+            let ptr = ::LLVMRustArchiveReadSection(self.ptr, file.as_ptr(),
+                                                   &mut size);
             if ptr.is_null() {
                 None
             } else {

diff --git a/src/librustc_llvm/lib.rs b/src/librustc_llvm/lib.rs
@@ -47,7 +47,7 @@ pub use self::Visibility::*;
 pub use self::DiagnosticSeverity::*;
 pub use self::Linkage::*;
 
-use std::c_str::ToCStr;
+use std::ffi::CString;
 use std::cell::RefCell;
 use std::{raw, mem};
 use libc::{c_uint, c_ushort, uint64_t, c_int, size_t, c_char};
@@ -2114,10 +2114,9 @@ impl Drop for TargetData {
 }
 
 pub fn mk_target_data(string_rep: &str) -> TargetData {
+    let string_rep = CString::from_slice(string_rep.as_bytes());
     TargetData {
-        lltd: string_rep.with_c_str(|buf| {
-            unsafe { LLVMCreateTargetData(buf) }
-        })
+        lltd: unsafe { LLVMCreateTargetData(string_rep.as_ptr()) }
     }
 }
 

diff --git a/src/librustc_trans/back/lto.rs b/src/librustc_trans/back/lto.rs
@@ -20,7 +20,7 @@ use rustc::util::common::time;
 use libc;
 use flate;
 
-use std::c_str::ToCStr;
+use std::ffi::CString;
 use std::iter;
 use std::mem;
 use std::num::Int;
@@ -139,9 +139,10 @@ pub fn run(sess: &session::Session, llmod: ModuleRef,
     }
 
     // Internalize everything but the reachable symbols of the current module
-    let cstrs: Vec<::std::c_str::CString> =
-        reachable.iter().map(|s| s.to_c_str()).collect();
-    let arr: Vec<*const libc::c_char> = cstrs.iter().map(|c| c.as_ptr()).collect();
+    let cstrs: Vec<CString> = reachable.iter().map(|s| {
+        CString::from_slice(s.as_bytes())
+    }).collect();
+    let arr: Vec<*const i8> = cstrs.iter().map(|c| c.as_ptr()).collect();
     let ptr = arr.as_ptr();
     unsafe {
         llvm::LLVMRustRunRestrictionPass(llmod,
@@ -164,15 +165,15 @@ pub fn run(sess: &session::Session, llmod: ModuleRef,
     unsafe {
         let pm = llvm::LLVMCreatePassManager();
         llvm::LLVMRustAddAnalysisPasses(tm, pm, llmod);
-        "verify".with_c_str(|s| llvm::LLVMRustAddPass(pm, s));
+        llvm::LLVMRustAddPass(pm, "verify\0".as_ptr() as *const _);
 
         let builder = llvm::LLVMPassManagerBuilderCreate();
         llvm::LLVMPassManagerBuilderPopulateLTOPassManager(builder, pm,
             /* Internalize = */ False,
             /* RunInliner = */ True);
         llvm::LLVMPassManagerBuilderDispose(builder);
 
-        "verify".with_c_str(|s| llvm::LLVMRustAddPass(pm, s));
+        llvm::LLVMRustAddPass(pm, "verify\0".as_ptr() as *const _);
 
         time(sess.time_passes(), "LTO passes", (), |()|
              llvm::LLVMRunPassManager(pm, llmod));