Merge pull request #3 from aldanor/feature/readme

aldanor · web-flow · commit 5d4b6996a087 · 2021-01-10T01:26:55.000Z
Add READMEs, drop MSRV to 1.37
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -11,7 +11,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        rust: [1.47.0, stable, nightly]
+        rust: [1.37.0, stable, nightly]
     steps:
       - uses: actions/checkout@v2
         with:
diff --git a/Cargo.toml b/Cargo.toml
@@ -21,7 +21,7 @@ std = []
 lexical-core = "0.7"
 criterion = "0.3"
 
-[dev-dependencies.hexf]
+[dev-dependencies.hexf-parse]
 version = "*"
 git = "https://github.com/lifthrasiir/hexf.git" # until the version on crates.io is updated
 rev = "0c95001574997847e1348c4f6dac5f434c772914"
diff --git a/README.md b/README.md
@@ -6,7 +6,136 @@ fast-float
 [![Documentation](https://docs.rs/fast-float/badge.svg)](https://docs.rs/fast-float)
 [![Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 [![MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
-[![Rust 1.47+](https://img.shields.io/badge/rustc-1.47+-lightgray.svg)](https://blog.rust-lang.org/2020/10/08/Rust-1.47.html)
+[![Rustc 1.37+](https://img.shields.io/badge/rustc-1.37+-lightgray.svg)](https://blog.rust-lang.org/2019/08/15/Rust-1.37.0.html)
+
+This crate provides a super-fast decimal number parser from strings into floats.
+
+```toml
+[dependencies]
+fast-float = "0.1"
+```
+
+There are no dependencies and the crate can be used in a no_std context by disabling the "std" feature.
+
+*Compiler support: rustc 1.37+.*
+
+## Usage
+
+There's two top-level functions provided: 
+[`parse()`](https://docs.rs/fast-float/latest/fast_float/fn.parse.html) and 
+[`parse_partial()`](https://docs.rs/fast-float/latest/fast_float/fn.parse_partial.html), both taking
+either a string or a bytes slice and parsing the input into either `f32` or `f64`: 
+
+- `parse()` treats the whole string as a decimal number and returns an error if there are
+  invalid characters or if the string is empty.
+- `parse_partial()` tries to find the longest substring at the beginning of the given input
+  string that can be parsed as a decimal number and, in the case of success, returns the parsed
+  value along the number of characters processed; an error is returned if the string doesn't
+  start with a decimal number or if it is empty. This function is most useful as a building
+  block when constructing more complex parsers, or when parsing streams of data.
+
+Example:
+
+```rust
+// Parse the entire string as a decimal number.
+let s = "1.23e-02";
+let x: f32 = fast_float::parse(s).unwrap();
+assert_eq!(x, 0.0123);
+
+// Parse as many characters as possible as a decimal number.
+let s = "1.23e-02foo";
+let (x, n) = fast_float::parse_partial::<f32, _>(s).unwrap();
+assert_eq!(x, 0.0123);
+assert_eq!(n, 8);
+assert_eq!(&s[n..], "foo");
+```
+
+## Details
+
+This crate is a direct port of Daniel Lemire's [`fast_float`](https://github.com/fastfloat/fast_float)
+C++ library (valuable discussions with Daniel while porting it helped shape the crate and get it to 
+the performance level it's at now), with some Rust-specific tweaks. Please see the original
+repository for many useful details regarding the algorithm and the implementation.
+
+The parser is locale-independent. The resulting value is the closest floating-point values (using either 
+`f32` or `f64), using the "round to even" convention for values that would otherwise fall right in-between 
+two values. That is, we provide exact parsing according to the IEEE standard. 
+
+Infinity and NaN values can be parsed, along with scientific notation.
+
+Both little-endian and big-endian platforms are equally supported, with extra optimizations enabled
+on little-endian architectures.
+
+## Performance
+
+The presented parser seems to beat all of the existing C/C++/Rust float parsers known to us at the
+moment by a large margin, in all of the datasets we tested it on so far – see detailed benchmarks 
+below (the only exception being the original fast_float C++ library, of course – performance of
+which is within noise bounds of this crate). On modern machines, parsing throughput can reach
+up to 1GB/s.
+
+In particular, it is faster than Rust standard library's `FromStr::from_str()` by a factor of 2-8x
+(larger factor for longer float strings).
+
+While various details regarding the algorithm can be found in the repository for the original
+C++ library, here are few brief notes:
+
+- The parser is specialized to work lightning-fast on inputs with at most 19 significant digits
+  (which constitutes the so called "fast-path"). We believe that most real-life inputs should
+  fall under this category, and we treat longer inputs as "degenerate" edge cases since it
+  inevitable causes overflows and loss of precision.
+- If the significand happens to be longer than 19 digits, the parser falls back to the "slow path",
+  in which case its performance roughly matches that of the top Rust/C++ libraries (and still
+  beats them most of the time, although not by a lot).
+- On little-endian systems, there's additional optimizations for numbers with more than 8 digits
+  after the decimal point.
+
+## Benchmarks
+
+Below is the table of average timings in nanoseconds for parsing a single number 
+into a 64-bit float.
+
+|                  | `canada` | `mesh`   | `uniform` | `iidi` | `iei`  | `rec32` |
+| ---------------- | -------- | -------- | --------- | ------ | ------ | ------- |
+| fast-float       | 22.08    | 11.10    | 20.04     | 40.77  | 26.33  | 29.84   |
+| lexical          | 61.63    | 25.10    | 53.77     | 72.33  | 53.39  | 72.40   |
+| lexical/lossy    | 61.51    | 25.24    | 54.00     | 71.30  | 52.87  | 71.71   |
+| from_str         | 175.07   | 22.58    | 103.00    | 228.78 | 115.76 | 211.13  |
+| fast_float (C++) | 22.78    | 10.99    | 20.05     | 41.12  | 27.51  | 30.85   |
+| abseil (C++)     | 42.66    | 32.88    | 46.01     | 50.83  | 46.33  | 49.95   |
+| netlib (C++)     | 57.53    | 24.86    | 64.72     | 56.63  | 36.20  | 67.29   |
+| strtod (C)       | 286.10   | 31.15    | 258.73    | 295.73 | 205.72 | 315.95  |
+
+Parsers:
+
+- `fast-float` - this very crate
+- `lexical` – from `lexical_core` crate, v0.7
+- `lexical/lossy` - from `lexical_core` crate, v0.7 (lossy parser)
+- `from_str` – Rust standard library, `FromStr` trait
+- `fast_float (C++)` – original C++ implementation of 'fast-float' method
+- `abseil (C++)` – Abseil C++ Common Libraries
+- `netlib (C++)` – C++ Network Library
+- `strtod (C)` – C standard library
+
+Datasets:
+
+- `canada` – numbers in `canada.txt` file
+- `mesh` – numbers in `mesh.txt` file
+- `uniform` – uniform random numbers from 0 to 1
+- `iidi` – random numbers of format `%d%d.%d`
+- `iei` – random numbers of format `%de%d`
+- `rec32` – reciprocals of random 32-bit integers
+
+Notes:
+
+- Test environment: macOS 10.14.6, clang 11.0, Rust 1.49, 3.5 GHz i7-4771 Haswell.
+- The two test files referred above can be found in 
+[this](https://github.com/lemire/simple_fastfloat_benchmark) repository.
+- The Rust part of the table (along with a few other benchmarks) can be generated via
+  the benchmark tool that can be found under `extras/simple-bench` of this repo.
+- The C/C++ part of the table (along with a few other benchmarks and parsers) can be
+  generated via a C++ utility that can be found in [this](https://github.com/lemire/simple_fastfloat_benchmark)
+  repository.
 
 <br>
 
diff --git a/extras/data-tests/Cargo.toml b/extras/data-tests/Cargo.toml
@@ -3,6 +3,8 @@ name = "fast-float-data-tests"
 version = "0.1.0"
 authors = ["Ivan Smirnov <i.s.smirnov@gmail.com>"]
 edition = "2018"
+readme = "README.md"
+license = "MIT OR Apache-2.0"
 publish = false
 
 [dependencies]
diff --git a/extras/data-tests/README.md b/extras/data-tests/README.md
@@ -0,0 +1,12 @@
+This crate allows running the test based on files with test cases stored in the
+standardized format (credit to Daniel Lemire and Nigel Tao for the test cases).
+The test data is sourced from [this](https://github.com/lemire/fast_float_supplemental_tests) 
+repository which is used for the original fast_float C++ library tests.
+
+Test data files can be found under `ext/data`.
+
+To run the tests:
+
+```sh
+cargo run --release
+```
diff --git a/extras/simple-bench/Cargo.toml b/extras/simple-bench/Cargo.toml
@@ -3,6 +3,8 @@ name = "fast-float-simple-bench"
 version = "0.1.0"
 authors = ["Ivan Smirnov <i.s.smirnov@gmail.com>"]
 edition = "2018"
+readme = "README.md"
+license = "MIT OR Apache-2.0"
 publish = false
 
 [dependencies]
diff --git a/extras/simple-bench/README.md b/extras/simple-bench/README.md
@@ -0,0 +1,27 @@
+This crate provides a utility for benchmarking the `fast-float` crate against
+`lexical_core` and standard library's `FromStr`.
+
+To run a file-based test:
+
+```sh
+cargo run --release -- file ext/canada.txt
+```
+
+There's two files used in benchmarking of the original fast_float C++ library
+(canada.txt and mesh.txt), they are sourced from
+[this](https://github.com/lemire/simple_fastfloat_benchmark) repository. These
+files can be found under `ext/data`.
+
+To run a randomized test:
+
+```sh
+cargo run --release -- random uniform
+```
+
+For more details and options (choosing a different random generator, storing 
+randomized inputs to a file, changing the number of runs, or switching between 
+32-bit and 64-bit floats), refer to help:
+
+```
+cargo run --release -- --help
+```
diff --git a/extras/simple-bench/src/main.rs b/extras/simple-bench/src/main.rs
@@ -7,7 +7,7 @@ use std::str::FromStr;
 use std::time::Instant;
 
 use fastrand::Rng;
-use lexical::FromLexical;
+use lexical::{FromLexical, FromLexicalLossy};
 use structopt::StructOpt;
 
 use fast_float::FastFloat;
@@ -87,7 +87,7 @@ fn run_one_bench<T: FastFloat, F: Fn(&str) -> T>(
     BenchResult { name, times }
 }
 
-fn run_all_benches<T: FastFloat + FromLexical + FromStr>(
+fn run_all_benches<T: FastFloat + FromLexical + FromLexicalLossy + FromStr>(
     inputs: &[String],
     repeat: usize,
 ) -> Vec<BenchResult> {
@@ -99,12 +99,19 @@ fn run_all_benches<T: FastFloat + FromLexical + FromStr>(
             .unwrap_or_default()
             .0
     };
-    let lex_res = run_one_bench("lexical_core", inputs, repeat, lex_func);
+    let lex_res = run_one_bench("lexical", inputs, repeat, lex_func);
+
+    let lexl_func = |s: &str| {
+        lexical_core::parse_partial_lossy::<T>(s.as_bytes())
+            .unwrap_or_default()
+            .0
+    };
+    let lexl_res = run_one_bench("lexical/lossy", inputs, repeat, lexl_func);
 
     let std_func = |s: &str| s.parse::<T>().unwrap_or_default();
     let std_res = run_one_bench("from_str", inputs, repeat, std_func);
 
-    vec![ff_res, lex_res, std_res]
+    vec![ff_res, lex_res, lexl_res, std_res]
 }
 
 fn print_report(inputs: &[String], results: &[BenchResult], inputs_name: &str, ty: &str) {
diff --git a/src/common.rs b/src/common.rs
@@ -99,7 +99,7 @@ impl<'a> AsciiStr<'a> {
 
     #[inline]
     pub fn offset_from(&self, other: &Self) -> isize {
-        unsafe { self.ptr.offset_from(other.ptr) } // assuming the same end
+        isize::wrapping_sub(self.ptr as _, other.ptr as _) // assuming the same end
     }
 }
 
diff --git a/src/decimal.rs b/src/decimal.rs
@@ -1,6 +1,8 @@
+use core::fmt::{self, Debug};
+
 use crate::common::{is_8digits_le, parse_digits, ByteSlice};
 
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Clone)]
 pub struct Decimal {
     pub num_digits: usize,
     pub decimal_point: i32,
@@ -9,6 +11,30 @@ pub struct Decimal {
     pub digits: [u8; Self::MAX_DIGITS],
 }
 
+impl Debug for Decimal {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_struct("Decimal")
+            .field("num_digits", &self.num_digits)
+            .field("decimal_point", &self.decimal_point)
+            .field("negative", &self.negative)
+            .field("truncated", &self.truncated)
+            .field("digits", &(&self.digits[..self.num_digits]))
+            .finish()
+    }
+}
+
+impl PartialEq for Decimal {
+    fn eq(&self, other: &Self) -> bool {
+        self.num_digits == other.num_digits
+            && self.decimal_point == other.decimal_point
+            && self.negative == other.negative
+            && self.truncated == other.truncated
+            && &self.digits[..] == &other.digits[..]
+    }
+}
+
+impl Eq for Decimal {}
+
 impl Default for Decimal {
     fn default() -> Self {
         Self {
@@ -46,7 +72,7 @@ impl Decimal {
         if self.num_digits == 0 || self.decimal_point < 0 {
             return 0;
         } else if self.decimal_point > 18 {
-            return u64::MAX;
+            return 0xFFFF_FFFF_FFFF_FFFF_u64;
         }
         let dp = self.decimal_point as usize;
         let mut n = 0_u64;
diff --git a/src/float.rs b/src/float.rs
@@ -46,10 +46,10 @@ pub trait Float:
 impl private::Sealed for f32 {}
 
 impl Float for f32 {
-    const INFINITY: Self = Self::INFINITY;
-    const NEG_INFINITY: Self = Self::NEG_INFINITY;
-    const NAN: Self = Self::NAN;
-    const NEG_NAN: Self = -Self::NAN;
+    const INFINITY: Self = core::f32::INFINITY;
+    const NEG_INFINITY: Self = core::f32::NEG_INFINITY;
+    const NAN: Self = core::f32::NAN;
+    const NEG_NAN: Self = -core::f32::NAN;
 
     const MANTISSA_EXPLICIT_BITS: usize = 23;
     const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
@@ -78,10 +78,10 @@ impl Float for f32 {
 impl private::Sealed for f64 {}
 
 impl Float for f64 {
-    const INFINITY: Self = Self::INFINITY;
-    const NEG_INFINITY: Self = Self::NEG_INFINITY;
-    const NAN: Self = Self::NAN;
-    const NEG_NAN: Self = -Self::NAN;
+    const INFINITY: Self = core::f64::INFINITY;
+    const NEG_INFINITY: Self = core::f64::NEG_INFINITY;
+    const NAN: Self = core::f64::NAN;
+    const NEG_NAN: Self = -core::f64::NAN;
 
     const MANTISSA_EXPLICIT_BITS: usize = 52;
     const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4;
diff --git a/src/lib.rs b/src/lib.rs
diff --git a/tests/test_basic.rs b/tests/test_basic.rs

Original file line number	Diff line number	Diff line change
`@@ -99,7 +99,7 @@ impl<'a> AsciiStr<'a> {`
`99`	`99`
`100`	`100`	`#[inline]`
`101`	`101`	`pub fn offset_from(&self, other: &Self) -> isize {`
`102`		`- unsafe { self.ptr.offset_from(other.ptr) } // assuming the same end`
	`102`	`+ isize::wrapping_sub(self.ptr as _, other.ptr as _) // assuming the same end`
`103`	`103`	`}`
`104`	`104`	`}`
`105`	`105`