From edd0e94994c98080c0fe88bd90b67a385482d9b6 Mon Sep 17 00:00:00 2001
From: Ryan Cumming <etaoins@gmail.com>
Date: Sun, 5 Jan 2020 10:16:35 +1100
Subject: [PATCH] Make `FileId` non-zero

When porting from `codespan` 0.3 Arret I needed to track the `FileId`
separately from the `Span` now that the file isn't implied in the span
itself. Having a simple struct like this works for 90% of the cases:

```rust
struct Span {
  file_id: codespan::FileId,
  codespan_span: codespan::Span,
}
```

However, this requires that every time we parse any source we require a
`FileId` which requires a `codespan::Files` to create. This is overkill
in a couple of cases:

1. Unit tests that just need to parse simple Arret strings as a
   shorthand for building our AST

2. Syntax highlighting in the REPL

The compromise in etaoins/arret#160 is to use `Option<FileId>`. I'm not
100% happy with that as the ultimate solution but it was the easiest
path to take during porting. However, this makes the size of the above
struct 13 bytes which will typically be rounded up to 16 bytes for
alignment and padding.

We can use `std::num::NonZeroU32` instead and offset all of our indexes
by 1. This allows the Rust compiler to use 0 to represent the `None`
case and bring the size down to 12 bytes. Because the struct only needs
an alignment of 4 bytes this is more likely to stay as 12 bytes in most
contexts.

A nicer solution would be if Rust had e.g. `NonMaxU32` but it appears
the Rust team instead wants to use const generics so arbitrary niche
values can be specified.
---
 codespan/src/file.rs | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/codespan/src/file.rs b/codespan/src/file.rs
index a519fcb5..3576da4c 100644
--- a/codespan/src/file.rs
+++ b/codespan/src/file.rs
@@ -1,5 +1,6 @@
 #[cfg(feature = "serialization")]
 use serde::{Deserialize, Serialize};
+use std::num::NonZeroU32;
 use std::{error, fmt};
 
 use crate::{ByteIndex, ColumnIndex, LineIndex, LineOffset, Location, RawIndex, Span};
@@ -67,7 +68,23 @@ impl fmt::Display for SpanOutOfBoundsError {
 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 #[cfg_attr(feature = "serialization", derive(Deserialize, Serialize))]
 #[cfg_attr(feature = "memory_usage", derive(heapsize_derive::HeapSizeOf))]
-pub struct FileId(u32);
+pub struct FileId(NonZeroU32);
+
+impl FileId {
+    /// Offset of our `FileId`'s numeric value to an index on `Files::files`.
+    ///
+    /// This is to ensure the first `FileId` is non-zero for memory layout optimisations (e.g.
+    /// `Option<FileId>` is 4 bytes)
+    const OFFSET: u32 = 1;
+
+    fn new(index: usize) -> FileId {
+        FileId(NonZeroU32::new(index as u32 + Self::OFFSET).unwrap())
+    }
+
+    fn get(self) -> usize {
+        (self.0.get() - Self::OFFSET) as usize
+    }
+}
 
 /// A database of source files.
 #[derive(Clone, Debug, Default)]
@@ -84,7 +101,7 @@ impl Files {
     /// Add a file to the database, returning the handle that can be used to
     /// refer to it again.
     pub fn add(&mut self, name: impl Into<String>, source: impl Into<String>) -> FileId {
-        let file_id = FileId(self.files.len() as u32);
+        let file_id = FileId::new(self.files.len());
         self.files.push(File::new(name.into(), source.into()));
         file_id
     }
@@ -100,13 +117,13 @@ impl Files {
     /// Get a the source file using the file id.
     // FIXME: return an option or result?
     fn get(&self, file_id: FileId) -> &File {
-        &self.files[file_id.0 as usize]
+        &self.files[file_id.get()]
     }
 
     /// Get a the source file using the file id.
     // FIXME: return an option or result?
     fn get_mut(&mut self, file_id: FileId) -> &mut File {
-        &mut self.files[file_id.0 as usize]
+        &mut self.files[file_id.get()]
     }
 
     /// Get the name of the source file.