• R/O
  • HTTP
  • SSH
  • HTTPS

コミット

タグ
未設定

よく使われているワード(クリックで追加)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

blake3パッケージ


コミットメタ情報

リビジョン865986014e705939431a95c85764b8143084a505 (tree)
日時2020-05-26 22:32:20
作者dyknon <dyknon@user...>
コミッターdyknon

ログメッセージ

Update upstream source from tag 'upstream/0.3.4'

Update to upstream version '0.3.4'
with Debian dir 240c93896830021d21ccc4d9ad9ead1c629cd761

変更サマリ

差分

--- /dev/null
+++ b/.github/workflows/build_b3sum.py
@@ -0,0 +1,37 @@
1+#! /usr/bin/env python3
2+
3+from pathlib import Path
4+import platform
5+import shutil
6+import subprocess
7+import sys
8+
9+ROOT = Path(__file__).parent.parent.parent
10+RUST_TARGET = sys.argv[1]
11+
12+subprocess.run(["cargo", "build", "--target", sys.argv[1], "--release"],
13+ cwd=ROOT / "b3sum")
14+
15+if platform.system() == "Windows":
16+ original_exe_name = "b3sum.exe"
17+else:
18+ original_exe_name = "b3sum"
19+
20+if platform.system() == "Windows":
21+ new_exe_name = "b3sum_windows_x64_bin.exe"
22+elif platform.system() == "Darwin":
23+ new_exe_name = "b3sum_macos_x64_bin"
24+elif platform.system() == "Linux":
25+ new_exe_name = "b3sum_linux_x64_bin"
26+else:
27+ raise RuntimeError("Unexpected platform: " + platform.system())
28+
29+# Copy the built binary so that it has the upload name we want.
30+out_dir = ROOT / "b3sum/target" / RUST_TARGET / "release"
31+original_exe_path = str(out_dir / original_exe_name)
32+new_exe_path = str(out_dir / new_exe_name)
33+print("copying", repr(original_exe_path), "to", repr(new_exe_path))
34+shutil.copyfile(original_exe_path, new_exe_path)
35+
36+# This lets the subsequent upload step get the filepath.
37+print("::set-output name=bin_path::" + new_exe_path)
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,10 +1,16 @@
11 name: tests
22
3-on: [push, pull_request]
3+on:
4+ push:
5+ branches:
6+ - "*"
7+ # not on tags
8+ pull_request:
49
510 env:
611 BLAKE3_CI: "1"
712 RUSTFLAGS: "-D warnings"
13+ RUST_BACKTRACE: "1"
814
915 jobs:
1016 cargo_tests:
@@ -28,8 +34,14 @@ jobs:
2834 toolchain: ${{ format('{0}-{1}', matrix.channel, matrix.target.toolchain) }}
2935 profile: minimal
3036 override: true
37+ # Print the compiler version, for debugging.
38+ - name: print compiler version
39+ run: cargo run --quiet
40+ working-directory: ./tools/compiler_version
3141 # Print out instruction set support, for debugging.
32- - run: cargo run --quiet --bin instruction_set_support
42+ - name: print instruction set support
43+ run: cargo run --quiet
44+ working-directory: ./tools/instruction_set_support
3345 # Default tests plus Rayon.
3446 - run: cargo test --features=rayon
3547 # no_std tests.
--- /dev/null
+++ b/.github/workflows/tag.yml
@@ -0,0 +1,45 @@
1+name: publish_b3sum_binaries
2+
3+on:
4+ push:
5+ tags:
6+ - "*"
7+
8+env:
9+ BLAKE3_CI: "1"
10+ RUSTFLAGS: "-D warnings"
11+
12+jobs:
13+ cargo_tests:
14+ name: ${{ matrix.target.name }}
15+ runs-on: ${{ matrix.target.os }}
16+ strategy:
17+ fail-fast: false
18+ matrix:
19+ target: [
20+ { "os": "ubuntu-latest", "rust-target": "x86_64-unknown-linux-musl", "name": "Linux" },
21+ { "os": "macOS-latest", "rust-target": "x86_64-apple-darwin", "name": "macOS" },
22+ { "os": "windows-latest", "rust-target": "x86_64-pc-windows-msvc", "name": "Windows" },
23+ ]
24+
25+ steps:
26+ - uses: actions/checkout@v1
27+ - uses: actions/setup-python@v1
28+ with:
29+ python-version: "3.x"
30+ - run: pip install PyGithub
31+ - run: sudo apt-get install musl-tools
32+ if: matrix.target.os == 'ubuntu-latest'
33+ - uses: actions-rs/toolchain@v1
34+ with:
35+ toolchain: stable
36+ profile: minimal
37+ - run: rustup target add ${{ matrix.target.rust-target }}
38+ - name: build b3sum
39+ id: build_b3sum
40+ run: python -u .github/workflows/build_b3sum.py ${{ matrix.target.rust-target }}
41+ - name: upload release asset
42+ env:
43+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
44+ GITHUB_TAG: ${{ github.ref }}
45+ run: python -u .github/workflows/upload_github_release_asset.py ${{ steps.build_b3sum.outputs.bin_path }}
--- /dev/null
+++ b/.github/workflows/upload_github_release_asset.py
@@ -0,0 +1,65 @@
1+#! /usr/bin/env python3
2+
3+import github
4+import os
5+import sys
6+
7+RETRIES = 10
8+
9+g = github.Github(os.environ["GITHUB_TOKEN"])
10+tag_name = os.environ["GITHUB_TAG"]
11+tag_prefix = "refs/tags/"
12+if tag_name.startswith(tag_prefix):
13+ tag_name = tag_name[len(tag_prefix):]
14+assert len(sys.argv) == 2
15+asset_path = sys.argv[1]
16+asset_name = os.path.basename(asset_path)
17+
18+repo = g.get_repo(os.environ["GITHUB_REPOSITORY"])
19+
20+tags = list(repo.get_tags())
21+
22+for tag in tags:
23+ if tag.name == tag_name:
24+ break
25+else:
26+ raise RuntimeError("no tag named " + repr(tag_name))
27+
28+try:
29+ print("Creating GitHub release for tag " + repr(tag_name) + "...")
30+ repo.create_git_release(tag_name, tag_name, tag.commit.commit.message)
31+except github.GithubException as github_error:
32+ if github_error.data["errors"][0]["code"] == "already_exists":
33+ print("Release for tag " + repr(tag_name) + " already exists.")
34+ else:
35+ raise
36+
37+releases = list(repo.get_releases())
38+for release in releases:
39+ if release.tag_name == tag_name:
40+ break
41+else:
42+ raise RuntimeError("no release for tag " + repr(tag_name))
43+
44+print("Uploading " + repr(asset_path) + "...")
45+for i in range(RETRIES):
46+ try:
47+ print("Upload attempt #{} of {}...".format(i + 1, RETRIES))
48+ release.upload_asset(asset_path)
49+ break
50+ except github.GithubException as github_error:
51+ # Unfortunately the asset upload API is flaky. Even worse, it often
52+ # partially succeeds, returning an error to the caller but leaving the
53+ # release in a state where subsequent uploads of the same asset will
54+ # fail with an "already_exists" error. (Though the asset is not visible
55+ # on github.com, so we can't just declare victory and move on.) If we
56+ # detect this case, explicitly delete the asset and continue retrying.
57+ print(github_error)
58+ for asset in release.get_assets():
59+ if asset.name == asset_name:
60+ print("Found uploaded asset after failure. Deleting...")
61+ asset.delete_asset()
62+else:
63+ raise RuntimeError("All upload attempts failed.")
64+
65+print("Success!")
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
11 [package]
22 name = "blake3"
3-version = "0.3.1"
3+version = "0.3.4"
44 authors = ["Jack O'Connor <oconnor663@gmail.com>"]
55 description = "the BLAKE3 hash function"
66 repository = "https://github.com/BLAKE3-team/BLAKE3"
@@ -86,4 +86,4 @@ rand_chacha = "0.2.1"
8686 reference_impl = { path = "./reference_impl" }
8787
8888 [build-dependencies]
89-cc = "1.0.48"
89+cc = "1.0.4"
--- a/README.md
+++ b/README.md
@@ -55,6 +55,11 @@ This repository is the official implementation of BLAKE3. It includes:
5555 port that doesn't need multi-threading or SIMD optimizations, start
5656 here.
5757
58+* A [set of test
59+ vectors](https://github.com/BLAKE3-team/BLAKE3/blob/master/test_vectors/test_vectors.json)
60+ that covers extended outputs, all three modes, and a variety of input
61+ lengths.
62+
5863 * [![Actions Status](https://github.com/BLAKE3-team/BLAKE3/workflows/tests/badge.svg)](https://github.com/BLAKE3-team/BLAKE3/actions)
5964
6065 BLAKE3 was designed by:
@@ -76,11 +81,14 @@ we recommend [Argon2](https://github.com/P-H-C/phc-winner-argon2).*
7681
7782 ### The `b3sum` utility
7883
79-The `b3sum` utility allows you to process files and data from standard
80-input using BLAKE3 in any of its three modes.
81-To use `b3sum` on the command line, [install Rust and
84+The `b3sum` command line utility prints the BLAKE3 hashes of files or of
85+standard input. Prebuilt binaries are available for Linux, Windows, and
86+macOS (requiring the [unidentified developer
87+workaround](https://support.apple.com/guide/mac-help/open-a-mac-app-from-an-unidentified-developer-mh40616/mac))
88+on the [releases page](https://github.com/BLAKE3-team/BLAKE3/releases).
89+If you've [installed Rust and
8290 Cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html),
83-and then run:
91+you can also build `b3sum` yourself with:
8492
8593 ```bash
8694 cargo install b3sum
@@ -89,7 +97,7 @@ cargo install b3sum
8997 If `rustup` didn't configure your `PATH` for you, you might need to go
9098 looking for the installed binary in e.g. `~/.cargo/bin`. You can test
9199 out how fast BLAKE3 is on your machine by creating a big file and
92-hashing it, for example as follows:
100+hashing it, for example:
93101
94102 ```bash
95103 # Create a 1 GB file.
--- a/b3sum/Cargo.toml
+++ b/b3sum/Cargo.toml
@@ -1,6 +1,6 @@
11 [package]
22 name = "b3sum"
3-version = "0.3.1"
3+version = "0.3.4"
44 authors = ["Jack O'Connor <oconnor663@gmail.com>"]
55 description = "a command line implementation of the BLAKE3 hash function"
66 repository = "https://github.com/BLAKE3-team/BLAKE3"
@@ -16,12 +16,12 @@ pure = ["blake3/pure"]
1616 [dependencies]
1717 anyhow = "1.0.25"
1818 blake3 = { version = "0.3", path = "..", features = ["rayon"] }
19-clap = { version = "2.33.0", default-features = false }
19+clap = "2.33.1"
2020 hex = "0.4.0"
2121 memmap = "0.7.0"
2222 rayon = "1.2.1"
23+wild = "2.0.3"
2324
2425 [dev-dependencies]
25-assert_cmd = "0.12.0"
2626 duct = "0.13.3"
2727 tempfile = "3.1.0"
--- a/b3sum/README.md
+++ b/b3sum/README.md
@@ -5,17 +5,21 @@ A command line utility for calculating
55 Coreutils tools like `b2sum` or `md5sum`.
66
77 ```
8-b3sum 0.3.1
8+b3sum 0.3.4
99
1010 USAGE:
1111 b3sum [FLAGS] [OPTIONS] [file]...
1212
1313 FLAGS:
14+ -c, --check Reads BLAKE3 sums from the [file]s and checks them
1415 -h, --help Prints help information
1516 --keyed Uses the keyed mode. The secret key is read from standard
1617 input, and it must be exactly 32 raw bytes.
17- --no-mmap Disables memory mapping
18+ --no-mmap Disables memory mapping. Currently this also disables
19+ multithreading.
1820 --no-names Omits filenames in the output
21+ --quiet Skips printing OK for each successfully verified file.
22+ Must be used with --check.
1923 --raw Writes raw output bytes to stdout, rather than hex.
2024 --no-names is implied. In this case, only a single
2125 input is allowed.
@@ -32,9 +36,13 @@ OPTIONS:
3236 RAYON_NUM_THREADS is also respected.
3337
3438 ARGS:
35- <file>...
39+ <file>... Files to hash, or checkfiles to check. When no file is given,
40+ or when - is given, read standard input.
3641 ```
3742
43+See also [this document about how the `--check` flag
44+works](./what_does_check_do.md).
45+
3846 # Example
3947
4048 Hash the file `foo.txt`:
--- a/b3sum/src/main.rs
+++ b/b3sum/src/main.rs
@@ -1,10 +1,16 @@
1-use anyhow::{bail, Context, Result};
1+use anyhow::{bail, ensure, Context, Result};
22 use clap::{App, Arg};
33 use std::cmp;
44 use std::convert::TryInto;
55 use std::fs::File;
66 use std::io;
77 use std::io::prelude::*;
8+use std::path::{Path, PathBuf};
9+
10+#[cfg(test)]
11+mod unit_tests;
12+
13+const NAME: &str = "b3sum";
814
915 const FILE_ARG: &str = "file";
1016 const DERIVE_KEY_ARG: &str = "derive-key";
@@ -14,70 +20,237 @@ const NO_MMAP_ARG: &str = "no-mmap";
1420 const NO_NAMES_ARG: &str = "no-names";
1521 const NUM_THREADS_ARG: &str = "num-threads";
1622 const RAW_ARG: &str = "raw";
23+const CHECK_ARG: &str = "check";
24+const QUIET_ARG: &str = "quiet";
1725
18-fn clap_parse_argv() -> clap::ArgMatches<'static> {
19- App::new("b3sum")
20- .version(env!("CARGO_PKG_VERSION"))
21- .arg(Arg::with_name(FILE_ARG).multiple(true))
22- .arg(
23- Arg::with_name(LENGTH_ARG)
24- .long(LENGTH_ARG)
25- .short("l")
26- .takes_value(true)
27- .value_name("LEN")
28- .help(
29- "The number of output bytes, prior to hex\n\
30- encoding (default 32)",
31- ),
32- )
33- .arg(
34- Arg::with_name(NUM_THREADS_ARG)
35- .long(NUM_THREADS_ARG)
36- .takes_value(true)
37- .value_name("NUM")
38- .help(
39- "The maximum number of threads to use. By\n\
40- default, this is the number of logical cores.\n\
41- If this flag is omitted, or if its value is 0,\n\
42- RAYON_NUM_THREADS is also respected.",
43- ),
44- )
45- .arg(
46- Arg::with_name(KEYED_ARG)
47- .long(KEYED_ARG)
48- .requires(FILE_ARG)
49- .help(
50- "Uses the keyed mode. The secret key is read from standard\n\
51- input, and it must be exactly 32 raw bytes.",
52- ),
53- )
54- .arg(
55- Arg::with_name(DERIVE_KEY_ARG)
56- .long(DERIVE_KEY_ARG)
57- .conflicts_with(KEYED_ARG)
58- .takes_value(true)
59- .value_name("CONTEXT")
60- .help(
61- "Uses the key derivation mode, with the given\n\
62- context string. Cannot be used with --keyed.",
63- ),
64- )
65- .arg(
66- Arg::with_name(NO_MMAP_ARG)
67- .long(NO_MMAP_ARG)
68- .help("Disables memory mapping"),
69- )
70- .arg(
71- Arg::with_name(NO_NAMES_ARG)
72- .long(NO_NAMES_ARG)
73- .help("Omits filenames in the output"),
74- )
75- .arg(Arg::with_name(RAW_ARG).long(RAW_ARG).help(
76- "Writes raw output bytes to stdout, rather than hex.\n\
77- --no-names is implied. In this case, only a single\n\
78- input is allowed.",
79- ))
80- .get_matches()
26+struct Args {
27+ inner: clap::ArgMatches<'static>,
28+ file_args: Vec<PathBuf>,
29+ base_hasher: blake3::Hasher,
30+}
31+
32+impl Args {
33+ fn parse() -> Result<Self> {
34+ let inner = App::new("b3sum")
35+ .version(env!("CARGO_PKG_VERSION"))
36+ .arg(Arg::with_name(FILE_ARG).multiple(true).help(
37+ "Files to hash, or checkfiles to check. When no file is given,\n\
38+ or when - is given, read standard input.",
39+ ))
40+ .arg(
41+ Arg::with_name(LENGTH_ARG)
42+ .long(LENGTH_ARG)
43+ .short("l")
44+ .takes_value(true)
45+ .value_name("LEN")
46+ .help(
47+ "The number of output bytes, prior to hex\n\
48+ encoding (default 32)",
49+ ),
50+ )
51+ .arg(
52+ Arg::with_name(NUM_THREADS_ARG)
53+ .long(NUM_THREADS_ARG)
54+ .takes_value(true)
55+ .value_name("NUM")
56+ .help(
57+ "The maximum number of threads to use. By\n\
58+ default, this is the number of logical cores.\n\
59+ If this flag is omitted, or if its value is 0,\n\
60+ RAYON_NUM_THREADS is also respected.",
61+ ),
62+ )
63+ .arg(
64+ Arg::with_name(KEYED_ARG)
65+ .long(KEYED_ARG)
66+ .requires(FILE_ARG)
67+ .help(
68+ "Uses the keyed mode. The secret key is read from standard\n\
69+ input, and it must be exactly 32 raw bytes.",
70+ ),
71+ )
72+ .arg(
73+ Arg::with_name(DERIVE_KEY_ARG)
74+ .long(DERIVE_KEY_ARG)
75+ .conflicts_with(KEYED_ARG)
76+ .takes_value(true)
77+ .value_name("CONTEXT")
78+ .help(
79+ "Uses the key derivation mode, with the given\n\
80+ context string. Cannot be used with --keyed.",
81+ ),
82+ )
83+ .arg(Arg::with_name(NO_MMAP_ARG).long(NO_MMAP_ARG).help(
84+ "Disables memory mapping. Currently this also disables\n\
85+ multithreading.",
86+ ))
87+ .arg(
88+ Arg::with_name(NO_NAMES_ARG)
89+ .long(NO_NAMES_ARG)
90+ .help("Omits filenames in the output"),
91+ )
92+ .arg(Arg::with_name(RAW_ARG).long(RAW_ARG).help(
93+ "Writes raw output bytes to stdout, rather than hex.\n\
94+ --no-names is implied. In this case, only a single\n\
95+ input is allowed.",
96+ ))
97+ .arg(
98+ Arg::with_name(CHECK_ARG)
99+ .long(CHECK_ARG)
100+ .short("c")
101+ .conflicts_with(DERIVE_KEY_ARG)
102+ .conflicts_with(KEYED_ARG)
103+ .conflicts_with(LENGTH_ARG)
104+ .conflicts_with(RAW_ARG)
105+ .conflicts_with(NO_NAMES_ARG)
106+ .help("Reads BLAKE3 sums from the [file]s and checks them"),
107+ )
108+ .arg(
109+ Arg::with_name(QUIET_ARG)
110+ .long(QUIET_ARG)
111+ .requires(CHECK_ARG)
112+ .help(
113+ "Skips printing OK for each successfully verified file.\n\
114+ Must be used with --check.",
115+ ),
116+ )
117+ // wild::args_os() is equivalent to std::env::args_os() on Unix,
118+ // but on Windows it adds support for globbing.
119+ .get_matches_from(wild::args_os());
120+ let file_args = if let Some(iter) = inner.values_of_os(FILE_ARG) {
121+ iter.map(|s| s.into()).collect()
122+ } else {
123+ vec!["-".into()]
124+ };
125+ if inner.is_present(RAW_ARG) && file_args.len() > 1 {
126+ bail!("Only one filename can be provided when using --raw");
127+ }
128+ let base_hasher = if inner.is_present(KEYED_ARG) {
129+ // In keyed mode, since stdin is used for the key, we can't handle
130+ // `-` arguments. Input::open handles that case below.
131+ blake3::Hasher::new_keyed(&read_key_from_stdin()?)
132+ } else if let Some(context) = inner.value_of(DERIVE_KEY_ARG) {
133+ blake3::Hasher::new_derive_key(context)
134+ } else {
135+ blake3::Hasher::new()
136+ };
137+ Ok(Self {
138+ inner,
139+ file_args,
140+ base_hasher,
141+ })
142+ }
143+
144+ fn num_threads(&self) -> Result<Option<usize>> {
145+ if let Some(num_threads_str) = self.inner.value_of(NUM_THREADS_ARG) {
146+ Ok(Some(
147+ num_threads_str
148+ .parse()
149+ .context("Failed to parse num threads.")?,
150+ ))
151+ } else {
152+ Ok(None)
153+ }
154+ }
155+
156+ fn check(&self) -> bool {
157+ self.inner.is_present(CHECK_ARG)
158+ }
159+
160+ fn raw(&self) -> bool {
161+ self.inner.is_present(RAW_ARG)
162+ }
163+
164+ fn no_mmap(&self) -> bool {
165+ self.inner.is_present(NO_MMAP_ARG)
166+ }
167+
168+ fn no_names(&self) -> bool {
169+ self.inner.is_present(NO_NAMES_ARG)
170+ }
171+
172+ fn len(&self) -> Result<u64> {
173+ if let Some(length) = self.inner.value_of(LENGTH_ARG) {
174+ length.parse::<u64>().context("Failed to parse length.")
175+ } else {
176+ Ok(blake3::OUT_LEN as u64)
177+ }
178+ }
179+
180+ fn keyed(&self) -> bool {
181+ self.inner.is_present(KEYED_ARG)
182+ }
183+
184+ fn quiet(&self) -> bool {
185+ self.inner.is_present(QUIET_ARG)
186+ }
187+}
188+
189+enum Input {
190+ Mmap(io::Cursor<memmap::Mmap>),
191+ File(File),
192+ Stdin,
193+}
194+
195+impl Input {
196+ // Open an input file, using mmap if appropriate. "-" means stdin. Note
197+ // that this convention applies both to command line arguments, and to
198+ // filepaths that appear in a checkfile.
199+ fn open(path: &Path, args: &Args) -> Result<Self> {
200+ if path == Path::new("-") {
201+ if args.keyed() {
202+ bail!("Cannot open `-` in keyed mode");
203+ }
204+ return Ok(Self::Stdin);
205+ }
206+ let file = File::open(path)?;
207+ if !args.no_mmap() {
208+ if let Some(mmap) = maybe_memmap_file(&file)? {
209+ return Ok(Self::Mmap(io::Cursor::new(mmap)));
210+ }
211+ }
212+ Ok(Self::File(file))
213+ }
214+
215+ fn hash(&mut self, args: &Args) -> Result<blake3::OutputReader> {
216+ let mut hasher = args.base_hasher.clone();
217+ match self {
218+ // The fast path: If we mmapped the file successfully, hash using
219+ // multiple threads. This doesn't work on stdin, or on some files,
220+ // and it can also be disabled with --no-mmap.
221+ Self::Mmap(cursor) => {
222+ hasher.update_with_join::<blake3::join::RayonJoin>(cursor.get_ref());
223+ }
224+ // The slower paths, for stdin or files we didn't/couldn't mmap.
225+ // This is currently all single-threaded. Doing multi-threaded
226+ // hashing without memory mapping is tricky, since all your worker
227+ // threads have to stop every time you refill the buffer, and that
228+ // ends up being a lot of overhead. To solve that, we need a more
229+ // complicated double-buffering strategy where a background thread
230+ // fills one buffer while the worker threads are hashing the other
231+ // one. We might implement that in the future, but since this is
232+ // the slow path anyway, it's not high priority.
233+ Self::File(file) => {
234+ copy_wide(file, &mut hasher)?;
235+ }
236+ Self::Stdin => {
237+ let stdin = io::stdin();
238+ let lock = stdin.lock();
239+ copy_wide(lock, &mut hasher)?;
240+ }
241+ }
242+ Ok(hasher.finalize_xof())
243+ }
244+}
245+
246+impl Read for Input {
247+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
248+ match self {
249+ Self::Mmap(cursor) => cursor.read(buf),
250+ Self::File(file) => file.read(buf),
251+ Self::Stdin => io::stdin().read(buf),
252+ }
253+ }
81254 }
82255
83256 // A 16 KiB buffer is enough to take advantage of all the SIMD instruction sets
@@ -100,20 +273,9 @@ fn copy_wide(mut reader: impl Read, hasher: &mut blake3::Hasher) -> io::Result<u
100273 }
101274 }
102275
103-// The slow path, for inputs that we can't memmap.
104-fn hash_reader(base_hasher: &blake3::Hasher, reader: impl Read) -> Result<blake3::OutputReader> {
105- let mut hasher = base_hasher.clone();
106- // This is currently all single-threaded. Doing multi-threaded hashing
107- // without memory mapping is tricky, since all your worker threads have to
108- // stop every time you refill the buffer, and that ends up being a lot of
109- // overhead. To solve that, we need a more complicated double-buffering
110- // strategy where a background thread fills one buffer while the worker
111- // threads are hashing the other one. We might implement that in the
112- // future, but since this is the slow path anyway, it's not high priority.
113- copy_wide(reader, &mut hasher)?;
114- Ok(hasher.finalize_xof())
115-}
116-
276+// Mmap a file, if it looks like a good idea. Return None in cases where we
277+// know mmap will fail, or if the file is short enough that mmapping isn't
278+// worth it. However, if we do try to mmap and it fails, return the error.
117279 fn maybe_memmap_file(file: &File) -> Result<Option<memmap::Mmap>> {
118280 let metadata = file.metadata()?;
119281 let file_size = metadata.len();
@@ -143,27 +305,9 @@ fn maybe_memmap_file(file: &File) -> Result<Option<memmap::Mmap>> {
143305 })
144306 }
145307
146-// The fast path: Try to hash a file by mem-mapping it first. This is faster if
147-// it works, but it's not always possible.
148-fn maybe_hash_memmap(
149- _base_hasher: &blake3::Hasher,
150- _file: &File,
151-) -> Result<Option<blake3::OutputReader>> {
152- if let Some(map) = maybe_memmap_file(_file)? {
153- // Memory mapping worked. Use Rayon-based multi-threading to split
154- // up the whole file across many worker threads.
155- return Ok(Some(
156- _base_hasher
157- .clone()
158- .update_with_join::<blake3::join::RayonJoin>(&map)
159- .finalize_xof(),
160- ));
161- }
162- Ok(None)
163-}
164-
165-fn write_hex_output(mut output: blake3::OutputReader, mut len: u64) -> Result<()> {
308+fn write_hex_output(mut output: blake3::OutputReader, args: &Args) -> Result<()> {
166309 // Encoding multiples of the block size is most efficient.
310+ let mut len = args.len()?;
167311 let mut block = [0; blake3::BLOCK_LEN];
168312 while len > 0 {
169313 output.fill(&mut block);
@@ -175,8 +319,8 @@ fn write_hex_output(mut output: blake3::OutputReader, mut len: u64) -> Result<()
175319 Ok(())
176320 }
177321
178-fn write_raw_output(output: blake3::OutputReader, len: u64) -> Result<()> {
179- let mut output = output.take(len);
322+fn write_raw_output(output: blake3::OutputReader, args: &Args) -> Result<()> {
323+ let mut output = output.take(args.len()?);
180324 let stdout = std::io::stdout();
181325 let mut handler = stdout.lock();
182326 std::io::copy(&mut output, &mut handler)?;
@@ -184,22 +328,6 @@ fn write_raw_output(output: blake3::OutputReader, len: u64) -> Result<()> {
184328 Ok(())
185329 }
186330
187-// Errors from this function get handled by the file loop and printed per-file.
188-fn hash_file(
189- base_hasher: &blake3::Hasher,
190- filepath: &std::ffi::OsStr,
191- mmap_disabled: bool,
192-) -> Result<blake3::OutputReader> {
193- let file = File::open(filepath)?;
194- if !mmap_disabled {
195- if let Some(output) = maybe_hash_memmap(&base_hasher, &file)? {
196- return Ok(output); // the fast path
197- }
198- }
199- // the slow path
200- hash_reader(&base_hasher, file)
201-}
202-
203331 fn read_key_from_stdin() -> Result<[u8; blake3::KEY_LEN]> {
204332 let mut bytes = Vec::with_capacity(blake3::KEY_LEN + 1);
205333 let n = std::io::stdin()
@@ -219,70 +347,275 @@ fn read_key_from_stdin() -> Result<[u8; blake3::KEY_LEN]> {
219347 }
220348 }
221349
222-fn main() -> Result<()> {
223- let args = clap_parse_argv();
224- let len = if let Some(length) = args.value_of(LENGTH_ARG) {
225- length.parse::<u64>().context("Failed to parse length.")?
350+struct FilepathString {
351+ filepath_string: String,
352+ is_escaped: bool,
353+}
354+
355+// returns (string, did_escape)
356+fn filepath_to_string(filepath: &Path) -> FilepathString {
357+ let unicode_cow = filepath.to_string_lossy();
358+ let mut filepath_string = unicode_cow.to_string();
359+ // If we're on Windows, normalize backslashes to forward slashes. This
360+ // avoids a lot of ugly escaping in the common case, and it makes
361+ // checkfiles created on Windows more likely to be portable to Unix. It
362+ // also allows us to set a blanket "no backslashes allowed in checkfiles on
363+ // Windows" rule, rather than allowing a Unix backslash to potentially get
364+ // interpreted as a directory separator on Windows.
365+ if cfg!(windows) {
366+ filepath_string = filepath_string.replace('\\', "/");
367+ }
368+ let mut is_escaped = false;
369+ if filepath_string.contains('\\') || filepath_string.contains('\n') {
370+ filepath_string = filepath_string.replace('\\', "\\\\").replace('\n', "\\n");
371+ is_escaped = true;
372+ }
373+ FilepathString {
374+ filepath_string,
375+ is_escaped,
376+ }
377+}
378+
379+fn hex_half_byte(c: char) -> Result<u8> {
380+ // The hex characters in the hash must be lowercase for now, though we
381+ // could support uppercase too if we wanted to.
382+ if '0' <= c && c <= '9' {
383+ return Ok(c as u8 - '0' as u8);
384+ }
385+ if 'a' <= c && c <= 'f' {
386+ return Ok(c as u8 - 'a' as u8 + 10);
387+ }
388+ bail!("Invalid hex");
389+}
390+
391+// The `check` command is a security tool. That means it's much better for a
392+// check to fail more often than it should (a false negative), than for a check
393+// to ever succeed when it shouldn't (a false positive). By forbidding certain
394+// characters in checked filepaths, we avoid a class of false positives where
395+// two different filepaths can get confused with each other.
396+fn check_for_invalid_characters(utf8_path: &str) -> Result<()> {
397+ // Null characters in paths should never happen, but they can result in a
398+ // path getting silently truncated on Unix.
399+ if utf8_path.contains('\0') {
400+ bail!("Null character in path");
401+ }
402+ // Because we convert invalid UTF-8 sequences in paths to the Unicode
403+ // replacement character, multiple different invalid paths can map to the
404+ // same UTF-8 string.
405+ if utf8_path.contains('�') {
406+ bail!("Unicode replacement character in path");
407+ }
408+ // We normalize all Windows backslashes to forward slashes in our output,
409+ // so the only natural way to get a backslash in a checkfile on Windows is
410+ // to construct it on Unix and copy it over. (Or of course you could just
411+ // doctor it by hand.) To avoid confusing this with a directory separator,
412+ // we forbid backslashes entirely on Windows. Note that this check comes
413+ // after unescaping has been done.
414+ if cfg!(windows) && utf8_path.contains('\\') {
415+ bail!("Backslash in path");
416+ }
417+ Ok(())
418+}
419+
420+fn unescape(mut path: &str) -> Result<String> {
421+ let mut unescaped = String::with_capacity(2 * path.len());
422+ while let Some(i) = path.find('\\') {
423+ ensure!(i < path.len() - 1, "Invalid backslash escape");
424+ unescaped.push_str(&path[..i]);
425+ match path[i + 1..].chars().next().unwrap() {
426+ // Anything other than a recognized escape sequence is an error.
427+ 'n' => unescaped.push_str("\n"),
428+ '\\' => unescaped.push_str("\\"),
429+ _ => bail!("Invalid backslash escape"),
430+ }
431+ path = &path[i + 2..];
432+ }
433+ unescaped.push_str(path);
434+ Ok(unescaped)
435+}
436+
437+#[derive(Debug)]
438+struct ParsedCheckLine {
439+ file_string: String,
440+ is_escaped: bool,
441+ file_path: PathBuf,
442+ expected_hash: blake3::Hash,
443+}
444+
445+fn parse_check_line(mut line: &str) -> Result<ParsedCheckLine> {
446+ // Trim off the trailing newline, if any.
447+ line = line.trim_end_matches('\n');
448+ // If there's a backslash at the front of the line, that means we need to
449+ // unescape the path below. This matches the behavior of e.g. md5sum.
450+ let first = if let Some(c) = line.chars().next() {
451+ c
226452 } else {
227- blake3::OUT_LEN as u64
453+ bail!("Empty line");
228454 };
229- let base_hasher = if args.is_present(KEYED_ARG) {
230- blake3::Hasher::new_keyed(&read_key_from_stdin()?)
231- } else if let Some(context) = args.value_of(DERIVE_KEY_ARG) {
232- blake3::Hasher::new_derive_key(context)
455+ let mut is_escaped = false;
456+ if first == '\\' {
457+ is_escaped = true;
458+ line = &line[1..];
459+ }
460+ // The front of the line must be a hash of the usual length, followed by
461+ // two spaces. The hex characters in the hash must be lowercase for now,
462+ // though we could support uppercase too if we wanted to.
463+ let hash_hex_len = 2 * blake3::OUT_LEN;
464+ let num_spaces = 2;
465+ let prefix_len = hash_hex_len + num_spaces;
466+ ensure!(line.len() > prefix_len, "Short line");
467+ ensure!(
468+ line.chars().take(prefix_len).all(|c| c.is_ascii()),
469+ "Non-ASCII prefix"
470+ );
471+ ensure!(&line[hash_hex_len..][..2] == " ", "Invalid space");
472+ // Decode the hash hex.
473+ let mut hash_bytes = [0; blake3::OUT_LEN];
474+ let mut hex_chars = line[..hash_hex_len].chars();
475+ for byte in &mut hash_bytes {
476+ let high_char = hex_chars.next().unwrap();
477+ let low_char = hex_chars.next().unwrap();
478+ *byte = 16 * hex_half_byte(high_char)? + hex_half_byte(low_char)?;
479+ }
480+ let expected_hash: blake3::Hash = hash_bytes.into();
481+ let file_string = line[prefix_len..].to_string();
482+ let file_path_string = if is_escaped {
483+ // If we detected a backslash at the start of the line earlier, now we
484+ // need to unescape backslashes and newlines.
485+ unescape(&file_string)?
233486 } else {
234- blake3::Hasher::new()
487+ file_string.clone().into()
235488 };
236- let mmap_disabled = args.is_present(NO_MMAP_ARG);
237- let print_names = !args.is_present(NO_NAMES_ARG);
238- let raw_output = args.is_present(RAW_ARG);
489+ check_for_invalid_characters(&file_path_string)?;
490+ Ok(ParsedCheckLine {
491+ file_string,
492+ is_escaped,
493+ file_path: file_path_string.into(),
494+ expected_hash,
495+ })
496+}
497+
498+fn hash_one_input(path: &Path, args: &Args) -> Result<()> {
499+ let mut input = Input::open(path, args)?;
500+ let output = input.hash(args)?;
501+ if args.raw() {
502+ write_raw_output(output, args)?;
503+ return Ok(());
504+ }
505+ if args.no_names() {
506+ write_hex_output(output, args)?;
507+ println!();
508+ return Ok(());
509+ }
510+ let FilepathString {
511+ filepath_string,
512+ is_escaped,
513+ } = filepath_to_string(path);
514+ if is_escaped {
515+ print!("\\");
516+ }
517+ write_hex_output(output, args)?;
518+ println!(" {}", filepath_string);
519+ Ok(())
520+}
521+
522+// Returns true for success. Having a boolean return value here, instead of
523+// passing down the some_file_failed reference, makes it less likely that we
524+// might forget to set it in some error condition.
525+fn check_one_line(line: &str, args: &Args) -> bool {
526+ let parse_result = parse_check_line(&line);
527+ let ParsedCheckLine {
528+ file_string,
529+ is_escaped,
530+ file_path,
531+ expected_hash,
532+ } = match parse_result {
533+ Ok(parsed) => parsed,
534+ Err(e) => {
535+ eprintln!("{}: {}", NAME, e);
536+ return false;
537+ }
538+ };
539+ let file_string = if is_escaped {
540+ "\\".to_string() + &file_string
541+ } else {
542+ file_string
543+ };
544+ let hash_result: Result<blake3::Hash> = Input::open(&file_path, args)
545+ .and_then(|mut input| input.hash(args))
546+ .map(|mut hash_output| {
547+ let mut found_hash_bytes = [0; blake3::OUT_LEN];
548+ hash_output.fill(&mut found_hash_bytes);
549+ found_hash_bytes.into()
550+ });
551+ let found_hash: blake3::Hash = match hash_result {
552+ Ok(hash) => hash,
553+ Err(e) => {
554+ println!("{}: FAILED ({})", file_string, e);
555+ return false;
556+ }
557+ };
558+ // This is a constant-time comparison.
559+ if expected_hash == found_hash {
560+ if !args.quiet() {
561+ println!("{}: OK", file_string);
562+ }
563+ true
564+ } else {
565+ println!("{}: FAILED", file_string);
566+ false
567+ }
568+}
569+
570+fn check_one_checkfile(path: &Path, args: &Args, some_file_failed: &mut bool) -> Result<()> {
571+ let checkfile_input = Input::open(path, args)?;
572+ let mut bufreader = io::BufReader::new(checkfile_input);
573+ let mut line = String::new();
574+ loop {
575+ line.clear();
576+ let n = bufreader.read_line(&mut line)?;
577+ if n == 0 {
578+ return Ok(());
579+ }
580+ // check_one_line() prints errors and turns them into a success=false
581+ // return, so it doesn't return a Result.
582+ let success = check_one_line(&line, args);
583+ if !success {
584+ *some_file_failed = true;
585+ }
586+ }
587+}
588+
589+fn main() -> Result<()> {
590+ let args = Args::parse()?;
239591 let mut thread_pool_builder = rayon::ThreadPoolBuilder::new();
240- if let Some(num_threads_str) = args.value_of(NUM_THREADS_ARG) {
241- let num_threads: usize = num_threads_str
242- .parse()
243- .context("Failed to parse num threads.")?;
592+ if let Some(num_threads) = args.num_threads()? {
244593 thread_pool_builder = thread_pool_builder.num_threads(num_threads);
245594 }
246-
247595 let thread_pool = thread_pool_builder.build()?;
248596 thread_pool.install(|| {
249- let mut did_error = false;
250- if let Some(files) = args.values_of_os(FILE_ARG) {
251- if raw_output && files.len() > 1 {
252- bail!("b3sum: Only one filename can be provided when using --raw");
253- }
254- for filepath in files {
255- let filepath_str = filepath.to_string_lossy();
256- match hash_file(&base_hasher, filepath, mmap_disabled) {
257- Ok(output) => {
258- if raw_output {
259- write_raw_output(output, len)?;
260- } else {
261- write_hex_output(output, len)?;
262- if print_names {
263- println!(" {}", filepath_str);
264- } else {
265- println!();
266- }
267- }
268- }
269- Err(e) => {
270- did_error = true;
271- eprintln!("b3sum: {}: {}", filepath_str, e);
272- }
273- }
274- }
275- } else {
276- let stdin = std::io::stdin();
277- let stdin = stdin.lock();
278- let output = hash_reader(&base_hasher, stdin)?;
279- if raw_output {
280- write_raw_output(output, len)?;
597+ let mut some_file_failed = false;
598+ // Note that file_args automatically includes `-` if nothing is given.
599+ for path in &args.file_args {
600+ if args.check() {
601+ // A hash mismatch or a failure to read a hashed file will be
602+ // printed in the checkfile loop, and will not propagate here.
603+ // This is similar to the explicit error handling we do in the
604+ // hashing case immediately below. In these cases,
605+ // some_file_failed will be set to false.
606+ check_one_checkfile(path, &args, &mut some_file_failed)?;
281607 } else {
282- write_hex_output(output, len)?;
283- println!();
608+ // Errors encountered in hashing are tolerated and printed to
609+ // stderr. This allows e.g. `b3sum *` to print errors for
610+ // non-files and keep going. However, if we encounter any
611+ // errors we'll still return non-zero at the end.
612+ let result = hash_one_input(path, &args);
613+ if let Err(e) = result {
614+ some_file_failed = true;
615+ eprintln!("{}: {}", NAME, e);
616+ }
284617 }
285618 }
286- std::process::exit(if did_error { 1 } else { 0 });
619+ std::process::exit(if some_file_failed { 1 } else { 0 });
287620 })
288621 }
--- /dev/null
+++ b/b3sum/src/unit_tests.rs
@@ -0,0 +1,189 @@
1+use std::path::Path;
2+
3+#[test]
4+fn test_parse_check_line() {
5+ // =========================
6+ // ===== Success Cases =====
7+ // =========================
8+
9+ // the basic case
10+ let crate::ParsedCheckLine {
11+ file_string,
12+ is_escaped,
13+ file_path,
14+ expected_hash,
15+ } = crate::parse_check_line(
16+ "0909090909090909090909090909090909090909090909090909090909090909 foo",
17+ )
18+ .unwrap();
19+ assert_eq!(expected_hash, blake3::Hash::from([0x09; 32]));
20+ assert!(!is_escaped);
21+ assert_eq!(file_string, "foo");
22+ assert_eq!(file_path, Path::new("foo"));
23+
24+ // regular whitespace
25+ let crate::ParsedCheckLine {
26+ file_string,
27+ is_escaped,
28+ file_path,
29+ expected_hash,
30+ } = crate::parse_check_line(
31+ "fafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafa fo \to\n\n\n",
32+ )
33+ .unwrap();
34+ assert_eq!(expected_hash, blake3::Hash::from([0xfa; 32]));
35+ assert!(!is_escaped);
36+ assert_eq!(file_string, "fo \to");
37+ assert_eq!(file_path, Path::new("fo \to"));
38+
39+ // path is one space
40+ let crate::ParsedCheckLine {
41+ file_string,
42+ is_escaped,
43+ file_path,
44+ expected_hash,
45+ } = crate::parse_check_line(
46+ "4242424242424242424242424242424242424242424242424242424242424242 ",
47+ )
48+ .unwrap();
49+ assert_eq!(expected_hash, blake3::Hash::from([0x42; 32]));
50+ assert!(!is_escaped);
51+ assert_eq!(file_string, " ");
52+ assert_eq!(file_path, Path::new(" "));
53+
54+ // *Unescaped* backslashes. Note that this line does *not* start with a
55+ // backslash, so something like "\" + "n" is interpreted as *two*
56+ // characters. We forbid all backslashes on Windows, so this test is
57+ // Unix-only.
58+ if cfg!(not(windows)) {
59+ let crate::ParsedCheckLine {
60+ file_string,
61+ is_escaped,
62+ file_path,
63+ expected_hash,
64+ } = crate::parse_check_line(
65+ "4343434343434343434343434343434343434343434343434343434343434343 fo\\a\\no",
66+ )
67+ .unwrap();
68+ assert_eq!(expected_hash, blake3::Hash::from([0x43; 32]));
69+ assert!(!is_escaped);
70+ assert_eq!(file_string, "fo\\a\\no");
71+ assert_eq!(file_path, Path::new("fo\\a\\no"));
72+ }
73+
74+ // escaped newline
75+ let crate::ParsedCheckLine {
76+ file_string,
77+ is_escaped,
78+ file_path,
79+ expected_hash,
80+ } = crate::parse_check_line(
81+ "\\4444444444444444444444444444444444444444444444444444444444444444 fo\\n\\no",
82+ )
83+ .unwrap();
84+ assert_eq!(expected_hash, blake3::Hash::from([0x44; 32]));
85+ assert!(is_escaped);
86+ assert_eq!(file_string, "fo\\n\\no");
87+ assert_eq!(file_path, Path::new("fo\n\no"));
88+
89+ // Escaped newline and backslash. Again because backslash is not allowed on
90+ // Windows, this test is Unix-only.
91+ if cfg!(not(windows)) {
92+ let crate::ParsedCheckLine {
93+ file_string,
94+ is_escaped,
95+ file_path,
96+ expected_hash,
97+ } = crate::parse_check_line(
98+ "\\4545454545454545454545454545454545454545454545454545454545454545 fo\\n\\\\o",
99+ )
100+ .unwrap();
101+ assert_eq!(expected_hash, blake3::Hash::from([0x45; 32]));
102+ assert!(is_escaped);
103+ assert_eq!(file_string, "fo\\n\\\\o");
104+ assert_eq!(file_path, Path::new("fo\n\\o"));
105+ }
106+
107+ // non-ASCII path
108+ let crate::ParsedCheckLine {
109+ file_string,
110+ is_escaped,
111+ file_path,
112+ expected_hash,
113+ } = crate::parse_check_line(
114+ "4646464646464646464646464646464646464646464646464646464646464646 否认",
115+ )
116+ .unwrap();
117+ assert_eq!(expected_hash, blake3::Hash::from([0x46; 32]));
118+ assert!(!is_escaped);
119+ assert_eq!(file_string, "否认");
120+ assert_eq!(file_path, Path::new("否认"));
121+
122+ // =========================
123+ // ===== Failure Cases =====
124+ // =========================
125+
126+ // too short
127+ crate::parse_check_line("").unwrap_err();
128+ crate::parse_check_line("0").unwrap_err();
129+ crate::parse_check_line("00").unwrap_err();
130+ crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000")
131+ .unwrap_err();
132+ crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000 ")
133+ .unwrap_err();
134+
135+ // not enough spaces
136+ crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000 foo")
137+ .unwrap_err();
138+
139+ // capital letter hex
140+ crate::parse_check_line(
141+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA foo",
142+ )
143+ .unwrap_err();
144+
145+ // non-hex hex
146+ crate::parse_check_line(
147+ "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx foo",
148+ )
149+ .unwrap_err();
150+
151+ // non-ASCII hex
152+ crate::parse_check_line("你好, 我叫杰克. 认识你很高兴. 要不要吃个香蕉? foo").unwrap_err();
153+
154+ // invalid escape sequence
155+ crate::parse_check_line(
156+ "\\0000000000000000000000000000000000000000000000000000000000000000 fo\\o",
157+ )
158+ .unwrap_err();
159+
160+ // truncated escape sequence
161+ crate::parse_check_line(
162+ "\\0000000000000000000000000000000000000000000000000000000000000000 foo\\",
163+ )
164+ .unwrap_err();
165+
166+ // null char
167+ crate::parse_check_line(
168+ "0000000000000000000000000000000000000000000000000000000000000000 fo\0o",
169+ )
170+ .unwrap_err();
171+
172+ // Unicode replacement char
173+ crate::parse_check_line(
174+ "0000000000000000000000000000000000000000000000000000000000000000 fo�o",
175+ )
176+ .unwrap_err();
177+
178+ // On Windows only, backslashes are not allowed, escaped or otherwise.
179+ if cfg!(windows) {
180+ crate::parse_check_line(
181+ "0000000000000000000000000000000000000000000000000000000000000000 fo\\o",
182+ )
183+ .unwrap_err();
184+ crate::parse_check_line(
185+ "\\0000000000000000000000000000000000000000000000000000000000000000 fo\\\\o",
186+ )
187+ .unwrap_err();
188+ }
189+}
--- /dev/null
+++ b/b3sum/tests/cli_tests.rs
@@ -0,0 +1,512 @@
1+use duct::cmd;
2+use std::ffi::OsString;
3+use std::fs;
4+use std::io::prelude::*;
5+use std::path::PathBuf;
6+
7+pub fn b3sum_exe() -> PathBuf {
8+ env!("CARGO_BIN_EXE_b3sum").into()
9+}
10+
11+#[test]
12+fn test_hash_one() {
13+ let expected = format!("{} -", blake3::hash(b"foo").to_hex());
14+ let output = cmd!(b3sum_exe()).stdin_bytes("foo").read().unwrap();
15+ assert_eq!(&*expected, output);
16+}
17+
18+#[test]
19+fn test_hash_one_raw() {
20+ let expected = blake3::hash(b"foo").as_bytes().to_owned();
21+ let output = cmd!(b3sum_exe(), "--raw")
22+ .stdin_bytes("foo")
23+ .stdout_capture()
24+ .run()
25+ .unwrap()
26+ .stdout;
27+ assert_eq!(expected, output.as_slice());
28+}
29+
30+#[test]
31+fn test_hash_many() {
32+ let dir = tempfile::tempdir().unwrap();
33+ let file1 = dir.path().join("file1");
34+ fs::write(&file1, b"foo").unwrap();
35+ let file2 = dir.path().join("file2");
36+ fs::write(&file2, b"bar").unwrap();
37+
38+ let output = cmd!(b3sum_exe(), &file1, &file2).read().unwrap();
39+ let foo_hash = blake3::hash(b"foo");
40+ let bar_hash = blake3::hash(b"bar");
41+ let expected = format!(
42+ "{} {}\n{} {}",
43+ foo_hash.to_hex(),
44+ // account for slash normalization on Windows
45+ file1.to_string_lossy().replace("\\", "/"),
46+ bar_hash.to_hex(),
47+ file2.to_string_lossy().replace("\\", "/"),
48+ );
49+ assert_eq!(expected, output);
50+
51+ let output_no_names = cmd!(b3sum_exe(), "--no-names", &file1, &file2)
52+ .read()
53+ .unwrap();
54+ let expected_no_names = format!("{}\n{}", foo_hash.to_hex(), bar_hash.to_hex(),);
55+ assert_eq!(expected_no_names, output_no_names);
56+}
57+
58+#[test]
59+fn test_hash_length() {
60+ let mut buf = [0; 100];
61+ blake3::Hasher::new()
62+ .update(b"foo")
63+ .finalize_xof()
64+ .fill(&mut buf);
65+ let expected = format!("{} -", hex::encode(&buf[..]));
66+ let output = cmd!(b3sum_exe(), "--length=100")
67+ .stdin_bytes("foo")
68+ .read()
69+ .unwrap();
70+ assert_eq!(&*expected, &*output);
71+}
72+
73+#[test]
74+fn test_keyed() {
75+ let key = [42; blake3::KEY_LEN];
76+ let f = tempfile::NamedTempFile::new().unwrap();
77+ f.as_file().write_all(b"foo").unwrap();
78+ f.as_file().flush().unwrap();
79+ let expected = blake3::keyed_hash(&key, b"foo").to_hex();
80+ let output = cmd!(b3sum_exe(), "--keyed", "--no-names", f.path())
81+ .stdin_bytes(&key[..])
82+ .read()
83+ .unwrap();
84+ assert_eq!(&*expected, &*output);
85+}
86+
87+#[test]
88+fn test_derive_key() {
89+ let context = "BLAKE3 2019-12-28 10:28:41 example context";
90+ let f = tempfile::NamedTempFile::new().unwrap();
91+ f.as_file().write_all(b"key material").unwrap();
92+ f.as_file().flush().unwrap();
93+ let mut derive_key_out = [0; blake3::OUT_LEN];
94+ blake3::derive_key(context, b"key material", &mut derive_key_out);
95+ let expected = hex::encode(&derive_key_out);
96+ let output = cmd!(b3sum_exe(), "--derive-key", context, "--no-names", f.path())
97+ .read()
98+ .unwrap();
99+ assert_eq!(&*expected, &*output);
100+}
101+
102+#[test]
103+fn test_no_mmap() {
104+ let f = tempfile::NamedTempFile::new().unwrap();
105+ f.as_file().write_all(b"foo").unwrap();
106+ f.as_file().flush().unwrap();
107+
108+ let expected = blake3::hash(b"foo").to_hex();
109+ let output = cmd!(b3sum_exe(), "--no-mmap", "--no-names", f.path())
110+ .read()
111+ .unwrap();
112+ assert_eq!(&*expected, &*output);
113+}
114+
115+#[test]
116+fn test_length_without_value_is_an_error() {
117+ let result = cmd!(b3sum_exe(), "--length")
118+ .stdin_bytes("foo")
119+ .stderr_capture()
120+ .run();
121+ assert!(result.is_err());
122+}
123+
124+#[test]
125+fn test_raw_with_multi_files_is_an_error() {
126+ let f1 = tempfile::NamedTempFile::new().unwrap();
127+ let f2 = tempfile::NamedTempFile::new().unwrap();
128+
129+ // Make sure it doesn't error with just one file
130+ let result = cmd!(b3sum_exe(), "--raw", f1.path()).stdout_capture().run();
131+ assert!(result.is_ok());
132+
133+ // Make sure it errors when both file are passed
134+ let result = cmd!(b3sum_exe(), "--raw", f1.path(), f2.path())
135+ .stderr_capture()
136+ .run();
137+ assert!(result.is_err());
138+}
139+
140+#[test]
141+#[cfg(unix)]
142+fn test_newline_and_backslash_escaping_on_unix() {
143+ let empty_hash = blake3::hash(b"").to_hex();
144+ let dir = tempfile::tempdir().unwrap();
145+ fs::create_dir(dir.path().join("subdir")).unwrap();
146+ let names = [
147+ "abcdef",
148+ "abc\ndef",
149+ "abc\\def",
150+ "abc\rdef",
151+ "abc\r\ndef",
152+ "subdir/foo",
153+ ];
154+ let mut paths = Vec::new();
155+ for name in &names {
156+ let path = dir.path().join(name);
157+ println!("creating file at {:?}", path);
158+ fs::write(&path, b"").unwrap();
159+ paths.push(path);
160+ }
161+ let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap();
162+ let expected = format!(
163+ "\
164+{0} abcdef
165+\\{0} abc\\ndef
166+\\{0} abc\\\\def
167+{0} abc\rdef
168+\\{0} abc\r\\ndef
169+{0} subdir/foo",
170+ empty_hash,
171+ );
172+ println!("output");
173+ println!("======");
174+ println!("{}", output);
175+ println!();
176+ println!("expected");
177+ println!("========");
178+ println!("{}", expected);
179+ println!();
180+ assert_eq!(expected, output);
181+}
182+
183+#[test]
184+#[cfg(windows)]
185+fn test_slash_normalization_on_windows() {
186+ let empty_hash = blake3::hash(b"").to_hex();
187+ let dir = tempfile::tempdir().unwrap();
188+ fs::create_dir(dir.path().join("subdir")).unwrap();
189+ // Note that filenames can't contain newlines or backslashes on Windows, so
190+ // we don't test escaping here. We only test forward slash and backslash as
191+ // directory separators.
192+ let names = ["abcdef", "subdir/foo", "subdir\\bar"];
193+ let mut paths = Vec::new();
194+ for name in &names {
195+ let path = dir.path().join(name);
196+ println!("creating file at {:?}", path);
197+ fs::write(&path, b"").unwrap();
198+ paths.push(path);
199+ }
200+ let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap();
201+ let expected = format!(
202+ "\
203+{0} abcdef
204+{0} subdir/foo
205+{0} subdir/bar",
206+ empty_hash,
207+ );
208+ println!("output");
209+ println!("======");
210+ println!("{}", output);
211+ println!();
212+ println!("expected");
213+ println!("========");
214+ println!("{}", expected);
215+ println!();
216+ assert_eq!(expected, output);
217+}
218+
219+#[test]
220+#[cfg(unix)]
221+fn test_invalid_unicode_on_unix() {
222+ use std::os::unix::ffi::OsStringExt;
223+
224+ let empty_hash = blake3::hash(b"").to_hex();
225+ let dir = tempfile::tempdir().unwrap();
226+ let names = ["abcdef".into(), OsString::from_vec(b"abc\xffdef".to_vec())];
227+ let mut paths = Vec::new();
228+ for name in &names {
229+ let path = dir.path().join(name);
230+ println!("creating file at {:?}", path);
231+ // Note: Some operating systems, macOS in particular, simply don't
232+ // allow invalid Unicode in filenames. On those systems, this write
233+ // will fail. That's fine, we'll just short-circuit this test in that
234+ // case. But assert that at least Linux allows this.
235+ let write_result = fs::write(&path, b"");
236+ if cfg!(target_os = "linux") {
237+ write_result.expect("Linux should allow invalid Unicode");
238+ } else if write_result.is_err() {
239+ return;
240+ }
241+ paths.push(path);
242+ }
243+ let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap();
244+ let expected = format!(
245+ "\
246+{0} abcdef
247+{0} abc�def",
248+ empty_hash,
249+ );
250+ println!("output");
251+ println!("======");
252+ println!("{}", output);
253+ println!();
254+ println!("expected");
255+ println!("========");
256+ println!("{}", expected);
257+ println!();
258+ assert_eq!(expected, output);
259+}
260+
261+#[test]
262+#[cfg(windows)]
263+fn test_invalid_unicode_on_windows() {
264+ use std::os::windows::ffi::OsStringExt;
265+
266+ let empty_hash = blake3::hash(b"").to_hex();
267+ let dir = tempfile::tempdir().unwrap();
268+ let surrogate_char = 0xDC00;
269+ let bad_unicode_wchars = [
270+ 'a' as u16,
271+ 'b' as u16,
272+ 'c' as u16,
273+ surrogate_char,
274+ 'd' as u16,
275+ 'e' as u16,
276+ 'f' as u16,
277+ ];
278+ let bad_osstring = OsString::from_wide(&bad_unicode_wchars);
279+ let names = ["abcdef".into(), bad_osstring];
280+ let mut paths = Vec::new();
281+ for name in &names {
282+ let path = dir.path().join(name);
283+ println!("creating file at {:?}", path);
284+ fs::write(&path, b"").unwrap();
285+ paths.push(path);
286+ }
287+ let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap();
288+ let expected = format!(
289+ "\
290+{0} abcdef
291+{0} abc�def",
292+ empty_hash,
293+ );
294+ println!("output");
295+ println!("======");
296+ println!("{}", output);
297+ println!();
298+ println!("expected");
299+ println!("========");
300+ println!("{}", expected);
301+ println!();
302+ assert_eq!(expected, output);
303+}
304+
305+#[test]
306+fn test_check() {
307+ // Make a directory full of files, and make sure the b3sum output in that
308+ // directory is what we expect.
309+ let a_hash = blake3::hash(b"a").to_hex();
310+ let b_hash = blake3::hash(b"b").to_hex();
311+ let cd_hash = blake3::hash(b"cd").to_hex();
312+ let dir = tempfile::tempdir().unwrap();
313+ fs::write(dir.path().join("a"), b"a").unwrap();
314+ fs::write(dir.path().join("b"), b"b").unwrap();
315+ fs::create_dir(dir.path().join("c")).unwrap();
316+ fs::write(dir.path().join("c/d"), b"cd").unwrap();
317+ let output = cmd!(b3sum_exe(), "a", "b", "c/d")
318+ .dir(dir.path())
319+ .stdout_capture()
320+ .stderr_capture()
321+ .run()
322+ .unwrap();
323+ let stdout = std::str::from_utf8(&output.stdout).unwrap();
324+ let stderr = std::str::from_utf8(&output.stderr).unwrap();
325+ let expected_checkfile = format!(
326+ "{} a\n\
327+ {} b\n\
328+ {} c/d\n",
329+ a_hash, b_hash, cd_hash,
330+ );
331+ assert_eq!(expected_checkfile, stdout);
332+ assert_eq!("", stderr);
333+
334+ // Now use the output we just validated as a checkfile, passed to stdin.
335+ let output = cmd!(b3sum_exe(), "--check")
336+ .stdin_bytes(expected_checkfile.as_bytes())
337+ .dir(dir.path())
338+ .stdout_capture()
339+ .stderr_capture()
340+ .run()
341+ .unwrap();
342+ let stdout = std::str::from_utf8(&output.stdout).unwrap();
343+ let stderr = std::str::from_utf8(&output.stderr).unwrap();
344+ let expected_check_output = "\
345+ a: OK\n\
346+ b: OK\n\
347+ c/d: OK\n";
348+ assert_eq!(expected_check_output, stdout);
349+ assert_eq!("", stderr);
350+
351+ // Now pass the same checkfile twice on the command line just for fun.
352+ let checkfile_path = dir.path().join("checkfile");
353+ fs::write(&checkfile_path, &expected_checkfile).unwrap();
354+ let output = cmd!(b3sum_exe(), "--check", &checkfile_path, &checkfile_path)
355+ .dir(dir.path())
356+ .stdout_capture()
357+ .stderr_capture()
358+ .run()
359+ .unwrap();
360+ let stdout = std::str::from_utf8(&output.stdout).unwrap();
361+ let stderr = std::str::from_utf8(&output.stderr).unwrap();
362+ let mut double_check_output = String::new();
363+ double_check_output.push_str(&expected_check_output);
364+ double_check_output.push_str(&expected_check_output);
365+ assert_eq!(double_check_output, stdout);
366+ assert_eq!("", stderr);
367+
368+ // Corrupt one of the files and check again.
369+ fs::write(dir.path().join("b"), b"CORRUPTION").unwrap();
370+ let output = cmd!(b3sum_exe(), "--check", &checkfile_path)
371+ .dir(dir.path())
372+ .stdout_capture()
373+ .stderr_capture()
374+ .unchecked()
375+ .run()
376+ .unwrap();
377+ let stdout = std::str::from_utf8(&output.stdout).unwrap();
378+ let stderr = std::str::from_utf8(&output.stderr).unwrap();
379+ let expected_check_failure = "\
380+ a: OK\n\
381+ b: FAILED\n\
382+ c/d: OK\n";
383+ assert!(!output.status.success());
384+ assert_eq!(expected_check_failure, stdout);
385+ assert_eq!("", stderr);
386+
387+ // Delete one of the files and check again.
388+ fs::remove_file(dir.path().join("b")).unwrap();
389+ let open_file_error = fs::File::open(dir.path().join("b")).unwrap_err();
390+ let output = cmd!(b3sum_exe(), "--check", &checkfile_path)
391+ .dir(dir.path())
392+ .stdout_capture()
393+ .stderr_capture()
394+ .unchecked()
395+ .run()
396+ .unwrap();
397+ let stdout = std::str::from_utf8(&output.stdout).unwrap();
398+ let stderr = std::str::from_utf8(&output.stderr).unwrap();
399+ let expected_check_failure = format!(
400+ "a: OK\n\
401+ b: FAILED ({})\n\
402+ c/d: OK\n",
403+ open_file_error,
404+ );
405+ assert!(!output.status.success());
406+ assert_eq!(expected_check_failure, stdout);
407+ assert_eq!("", stderr);
408+
409+ // Confirm that --quiet suppresses the OKs but not the FAILEDs.
410+ let output = cmd!(b3sum_exe(), "--check", "--quiet", &checkfile_path)
411+ .dir(dir.path())
412+ .stdout_capture()
413+ .stderr_capture()
414+ .unchecked()
415+ .run()
416+ .unwrap();
417+ let stdout = std::str::from_utf8(&output.stdout).unwrap();
418+ let stderr = std::str::from_utf8(&output.stderr).unwrap();
419+ let expected_check_failure = format!("b: FAILED ({})\n", open_file_error);
420+ assert!(!output.status.success());
421+ assert_eq!(expected_check_failure, stdout);
422+ assert_eq!("", stderr);
423+}
424+
425+#[test]
426+fn test_check_invalid_characters() {
427+ // Check that a null character in the path fails.
428+ let output = cmd!(b3sum_exe(), "--check")
429+ .stdin_bytes("0000000000000000000000000000000000000000000000000000000000000000 \0")
430+ .stdout_capture()
431+ .stderr_capture()
432+ .unchecked()
433+ .run()
434+ .unwrap();
435+ let stdout = std::str::from_utf8(&output.stdout).unwrap();
436+ let stderr = std::str::from_utf8(&output.stderr).unwrap();
437+ assert!(!output.status.success());
438+ assert_eq!("", stdout);
439+ assert_eq!("b3sum: Null character in path\n", stderr);
440+
441+ // Check that a Unicode replacement character in the path fails.
442+ let output = cmd!(b3sum_exe(), "--check")
443+ .stdin_bytes("0000000000000000000000000000000000000000000000000000000000000000 �")
444+ .stdout_capture()
445+ .stderr_capture()
446+ .unchecked()
447+ .run()
448+ .unwrap();
449+ let stdout = std::str::from_utf8(&output.stdout).unwrap();
450+ let stderr = std::str::from_utf8(&output.stderr).unwrap();
451+ assert!(!output.status.success());
452+ assert_eq!("", stdout);
453+ assert_eq!("b3sum: Unicode replacement character in path\n", stderr);
454+
455+ // Check that an invalid escape sequence in the path fails.
456+ let output = cmd!(b3sum_exe(), "--check")
457+ .stdin_bytes("\\0000000000000000000000000000000000000000000000000000000000000000 \\a")
458+ .stdout_capture()
459+ .stderr_capture()
460+ .unchecked()
461+ .run()
462+ .unwrap();
463+ let stdout = std::str::from_utf8(&output.stdout).unwrap();
464+ let stderr = std::str::from_utf8(&output.stderr).unwrap();
465+ assert!(!output.status.success());
466+ assert_eq!("", stdout);
467+ assert_eq!("b3sum: Invalid backslash escape\n", stderr);
468+
469+ // Windows also forbids literal backslashes. Check for that if and only if
470+ // we're on Windows.
471+ if cfg!(windows) {
472+ let output = cmd!(b3sum_exe(), "--check")
473+ .stdin_bytes("0000000000000000000000000000000000000000000000000000000000000000 \\")
474+ .stdout_capture()
475+ .stderr_capture()
476+ .unchecked()
477+ .run()
478+ .unwrap();
479+ let stdout = std::str::from_utf8(&output.stdout).unwrap();
480+ let stderr = std::str::from_utf8(&output.stderr).unwrap();
481+ assert!(!output.status.success());
482+ assert_eq!("", stdout);
483+ assert_eq!("b3sum: Backslash in path\n", stderr);
484+ }
485+}
486+
487+#[test]
488+fn test_globbing() {
489+ // On Unix, globbing is provided by the shell. On Windows, globbing is
490+ // provided by us, using the `wild` crate.
491+ let dir = tempfile::tempdir().unwrap();
492+ let file1 = dir.path().join("file1");
493+ fs::write(&file1, b"foo").unwrap();
494+ let file2 = dir.path().join("file2");
495+ fs::write(&file2, b"bar").unwrap();
496+
497+ let foo_hash = blake3::hash(b"foo");
498+ let bar_hash = blake3::hash(b"bar");
499+ let expected = format!("{} file1\n{} file2", foo_hash.to_hex(), bar_hash.to_hex());
500+
501+ let star_command = format!("{} *", b3sum_exe().to_str().unwrap());
502+ let (exe, c_flag) = if cfg!(windows) {
503+ ("cmd.exe", "/C")
504+ } else {
505+ ("/bin/sh", "-c")
506+ };
507+ let output = cmd!(exe, c_flag, star_command)
508+ .dir(dir.path())
509+ .read()
510+ .unwrap();
511+ assert_eq!(expected, output);
512+}
--- a/b3sum/tests/test.rs
+++ /dev/null
@@ -1,136 +0,0 @@
1-use duct::cmd;
2-use std::fs;
3-use std::io::prelude::*;
4-use std::path::PathBuf;
5-
6-pub fn b3sum_exe() -> PathBuf {
7- assert_cmd::cargo::cargo_bin("b3sum")
8-}
9-
10-#[test]
11-fn test_hash_one() {
12- let expected = blake3::hash(b"foo").to_hex();
13- let output = cmd!(b3sum_exe()).stdin_bytes("foo").read().unwrap();
14- assert_eq!(&*expected, output);
15-}
16-
17-#[test]
18-fn test_hash_one_raw() {
19- let expected = blake3::hash(b"foo").as_bytes().to_owned();
20- let output = cmd!(b3sum_exe(), "--raw")
21- .stdin_bytes("foo")
22- .stdout_capture()
23- .run()
24- .unwrap()
25- .stdout;
26- assert_eq!(expected, output.as_slice());
27-}
28-
29-#[test]
30-fn test_hash_many() {
31- let dir = tempfile::tempdir().unwrap();
32- let file1 = dir.path().join("file1");
33- fs::write(&file1, b"foo").unwrap();
34- let file2 = dir.path().join("file2");
35- fs::write(&file2, b"bar").unwrap();
36-
37- let output = cmd!(b3sum_exe(), &file1, &file2).read().unwrap();
38- let foo_hash = blake3::hash(b"foo");
39- let bar_hash = blake3::hash(b"bar");
40- let expected = format!(
41- "{} {}\n{} {}",
42- foo_hash.to_hex(),
43- file1.to_string_lossy(),
44- bar_hash.to_hex(),
45- file2.to_string_lossy(),
46- );
47- assert_eq!(expected, output);
48-
49- let output_no_names = cmd!(b3sum_exe(), "--no-names", &file1, &file2)
50- .read()
51- .unwrap();
52- let expected_no_names = format!("{}\n{}", foo_hash.to_hex(), bar_hash.to_hex(),);
53- assert_eq!(expected_no_names, output_no_names);
54-}
55-
56-#[test]
57-fn test_hash_length() {
58- let mut buf = [0; 100];
59- blake3::Hasher::new()
60- .update(b"foo")
61- .finalize_xof()
62- .fill(&mut buf);
63- let expected = hex::encode(&buf[..]);
64- let output = cmd!(b3sum_exe(), "--length=100")
65- .stdin_bytes("foo")
66- .read()
67- .unwrap();
68- assert_eq!(&*expected, &*output);
69-}
70-
71-#[test]
72-fn test_keyed() {
73- let key = [42; blake3::KEY_LEN];
74- let f = tempfile::NamedTempFile::new().unwrap();
75- f.as_file().write_all(b"foo").unwrap();
76- f.as_file().flush().unwrap();
77- let expected = blake3::keyed_hash(&key, b"foo").to_hex();
78- let output = cmd!(b3sum_exe(), "--keyed", "--no-names", f.path())
79- .stdin_bytes(&key[..])
80- .read()
81- .unwrap();
82- assert_eq!(&*expected, &*output);
83-}
84-
85-#[test]
86-fn test_derive_key() {
87- let context = "BLAKE3 2019-12-28 10:28:41 example context";
88- let f = tempfile::NamedTempFile::new().unwrap();
89- f.as_file().write_all(b"key material").unwrap();
90- f.as_file().flush().unwrap();
91- let mut derive_key_out = [0; blake3::OUT_LEN];
92- blake3::derive_key(context, b"key material", &mut derive_key_out);
93- let expected = hex::encode(&derive_key_out);
94- let output = cmd!(b3sum_exe(), "--derive-key", context, "--no-names", f.path())
95- .read()
96- .unwrap();
97- assert_eq!(&*expected, &*output);
98-}
99-
100-#[test]
101-fn test_no_mmap() {
102- let f = tempfile::NamedTempFile::new().unwrap();
103- f.as_file().write_all(b"foo").unwrap();
104- f.as_file().flush().unwrap();
105-
106- let expected = blake3::hash(b"foo").to_hex();
107- let output = cmd!(b3sum_exe(), "--no-mmap", "--no-names", f.path())
108- .read()
109- .unwrap();
110- assert_eq!(&*expected, &*output);
111-}
112-
113-#[test]
114-fn test_length_without_value_is_an_error() {
115- let result = cmd!(b3sum_exe(), "--length")
116- .stdin_bytes("foo")
117- .stderr_capture()
118- .run();
119- assert!(result.is_err());
120-}
121-
122-#[test]
123-fn test_raw_with_multi_files_is_an_error() {
124- let f1 = tempfile::NamedTempFile::new().unwrap();
125- let f2 = tempfile::NamedTempFile::new().unwrap();
126-
127- // Make sure it doesn't error with just one file
128- let result = cmd!(b3sum_exe(), "--raw", f1.path()).stdout_capture().run();
129- assert!(result.is_ok());
130-
131- // Make sure it errors when both file are passed
132- let result = cmd!(b3sum_exe(), "--raw", f1.path(), f2.path())
133- .stderr_capture()
134- .run();
135- assert!(result.is_err());
136-}
--- /dev/null
+++ b/b3sum/what_does_check_do.md
@@ -0,0 +1,174 @@
1+# How does `b3sum --check` behave exactly?<br>or: Are filepaths...text?
2+
3+Most of the time, `b3sum --check` is a drop-in replacement for `md5sum --check`
4+and other Coreutils hashing tools. It consumes a checkfile (the output of a
5+regular `b3sum` command), re-hashes all the files listed there, and returns
6+success if all of those hashes are still correct. What makes this more
7+complicated than it might seem, is that representing filepaths as text means we
8+need to consider many possible edge cases of unrepresentable filepaths. This
9+document describes all of these edge cases in detail.
10+
11+## The simple case
12+
13+Here's the result of running `b3sum a b c/d` in a directory that contains
14+those three files:
15+
16+```bash
17+$ echo hi > a
18+$ echo lo > b
19+$ mkdir c
20+$ echo stuff > c/d
21+$ b3sum a b c/d
22+0b8b60248fad7ac6dfac221b7e01a8b91c772421a15b387dd1fb2d6a94aee438 a
23+6ae4a57bbba24f79c461d30bcb4db973b9427d9207877e34d2d74528daa84115 b
24+2d477356c962e54784f1c5dc5297718d92087006f6ee96b08aeaf7f3cd252377 c/d
25+```
26+
27+If we pipe that output into `b3sum --check`, it will exit with status zero
28+(success) and print:
29+
30+```bash
31+$ b3sum a b c/d | b3sum --check
32+a: OK
33+b: OK
34+c/d: OK
35+```
36+
37+If we delete `b` and change the contents of `c/d`, and then use the same
38+checkfile as above, `b3sum --check` will exit with a non-zero status (failure)
39+and print:
40+
41+```bash
42+$ b3sum a b c/d > checkfile
43+$ rm b
44+$ echo more stuff >> c/d
45+$ b3sum --check checkfile
46+a: OK
47+b: FAILED (No such file or directory (os error 2))
48+c/d: FAILED
49+```
50+
51+In these typical cases, `b3sum` and `md5sum` have identical output for success
52+and very similar output for failure.
53+
54+## Escaping newlines and backslashes
55+
56+Since the checkfile format (the regular output format of `b3sum`) is
57+newline-separated text, we need to worry about what happens when a filepath
58+contains a newline, or worse. Suppose we create a file named `x[newline]x`
59+(3 characters). One way to create such a file is with a Python one-liner like
60+this:
61+
62+```python
63+>>> open("x\nx", "w")
64+```
65+
66+Here's what happens when we hash that file with `b3sum`:
67+
68+```bash
69+$ b3sum x*
70+\af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 x\nx
71+```
72+
73+Notice two things. First, `b3sum` puts a single `\` character at the front of
74+the line. This indicates that the filepath contains escape sequences that
75+`b3sum --check` will need to unescape. Then, `b3sum` replaces the newline
76+character in the filepath with the two-character escape sequence `\n`.
77+Similarly, if the filepath contained a backslash, `b3sum` would escape it as
78+`\\` in the output. So far, all of this behavior is still identical to
79+`md5sum`.
80+
81+## Invalid Unicode
82+
83+This is where `b3sum` and `md5um` diverge. Apart from the newline and backslash
84+escapes described above, `md5sum` copies all other filepath bytes verbatim to
85+its output. That means its output encoding is "ASCII plus whatever bytes we got
86+from the command line". This creates two problems:
87+
88+1. Printing something that isn't UTF-8 is kind of gross.
89+2. Windows support.
90+
91+What's the deal with Windows? To start with, there's a fundamental difference
92+in how Unix and Windows represent filepaths. Unix filepaths are "usually UTF-8"
93+and Windows filepaths are "usually UTF-16". That means that a file named `abc`
94+is typically represented as the bytes `[97, 98, 99]` on Unix and as the bytes
95+`[97, 0, 98, 0, 99, 0]` on Windows. The `md5sum` approach won't work if we plan
96+on creating a checkfile on Unix and checking it on Windows, or vice versa.
97+
98+A more portable approach is to convert platform-specific bytes into some
99+consistent Unicode encoding. (In practice this is going to be UTF-8, but in
100+theory it could be anything.) Then when `--check` needs to open a file, we
101+convert the Unicode representation back into platform-specific bytes. This
102+makes important common case like `abc`, and in fact even `abc[newline]def`,
103+work as expected. Great!
104+
105+But...what did we mean above when we said *usually* UTF-8 and *usually* UTF-16?
106+It turns out that not every possible sequence of bytes is valid UTF-8, and not
107+every possible sequence of 16-bit wide chars is valid UTF-16. For example, the
108+byte 0xFF (255) can never appear in any UTF-8 string. If we ask Python to
109+decode it, it yells at us:
110+
111+```python
112+>>> b"\xFF".decode("UTF-8")
113+UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte
114+```
115+
116+However, tragically, we *can* create a file with that byte in its name (on
117+Linux at least, though not usually on macOS):
118+
119+```python
120+>>> open(b"y\xFFy", "w")
121+```
122+
123+So some filepaths aren't representable in Unicode at all. Our plan to "convert
124+platform-specific bytes into some consistent Unicode encoding" isn't going to
125+work for everything. What does `b3sum` do with the file above?
126+
127+```bash
128+$ b3sum y*
129+af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 y�y
130+```
131+
132+That � in there is a "Unicode replacement character". When we run into
133+filepaths that we can't represent in Unicode, we replace the unrepresentable
134+parts with these characters. On the checking side, to avoid any possible
135+confusion between two different invalid filepaths, we automatically fail if we
136+see a replacement character. Together with a few more details covered in the
137+next section, this gives us an important set of properties:
138+
139+1. Any file can be hashed locally.
140+2. Any file with a valid Unicode name not containing the � character can be
141+ checked.
142+3. Checking ambiguous or unrepresentable filepaths always fails.
143+4. Checkfiles are always valid UTF-8.
144+5. Checkfiles are portable between Unix and Windows.
145+
146+## Formal Rules
147+
148+1. When hashing, filepaths are represented in a platform-specific encoding,
149+ which can accommodate any filepath on the current platform. In Rust, this is
150+ `OsStr`/`OsString`.
151+2. In output, filepaths are first converted to UTF-8. Any non-Unicode segments
152+ are replaced with Unicode replacement characters (U+FFFD). In Rust, this is
153+ `OsStr::to_string_lossy`.
154+3. Then, if a filepath contains any backslashes (U+005C) or newlines (U+000A),
155+ these characters are escaped as `\\` and `\n` respectively.
156+4. Finally, any output line containing an escape sequence is prefixed with a
157+ single backslash.
158+5. When checking, each line is parsed as UTF-8, separated by a newline
159+ (U+000A). Invalid UTF-8 is an error.
160+6. Then, if a line begins with a backslash, the filepath component is
161+ unescaped. Any escape sequence other than `\\` or `\n` is an error. If a
162+ line does not begin with a backslash, unescaping is not performed, and any
163+ backslashes in the filepath component are interpreted literally. (`b3sum`
164+ output never contains unescaped backslashes, but they can occur in
165+ checkfiles assembled by hand.)
166+7. Finally, if a filepath contains a Unicode replacement character (U+FFFD) or
167+ a null character (U+0000), it is an error.
168+
169+ **Additionally, on Windows only:**
170+
171+8. In output, all backslashes (U+005C) are replaced with forward slashes
172+ (U+002F).
173+9. When checking, after unescaping, if a filepath contains a backslash, it is
174+ an error.
--- a/build.rs
+++ b/build.rs
@@ -74,7 +74,15 @@ fn new_build() -> cc::Build {
7474 build
7575 }
7676
77-fn c_compiler_exists_and_supports_avx512() -> bool {
77+#[derive(PartialEq)]
78+enum CCompilerSupport {
79+ NoCompiler,
80+ NoAVX512,
81+ YesAVX512,
82+}
83+use CCompilerSupport::*;
84+
85+fn c_compiler_support() -> CCompilerSupport {
7886 let build = new_build();
7987 let flags_checked;
8088 let support_result: Result<bool, _> = if is_windows_msvc() {
@@ -90,14 +98,14 @@ fn c_compiler_exists_and_supports_avx512() -> bool {
9098 }
9199 };
92100 match support_result {
93- Ok(true) => true,
101+ Ok(true) => YesAVX512,
94102 Ok(false) => {
95103 warn(&format!(
96104 "The C compiler {:?} does not support {}.",
97105 build.get_compiler().path(),
98106 flags_checked,
99107 ));
100- false
108+ NoAVX512
101109 }
102110 Err(e) => {
103111 println!("{:?}", e);
@@ -105,26 +113,44 @@ fn c_compiler_exists_and_supports_avx512() -> bool {
105113 "No C compiler {:?} detected.",
106114 build.get_compiler().path()
107115 ));
108- false
116+ NoCompiler
109117 }
110118 }
111119 }
112120
113-fn build_x86_pure() {
114- // A pure Rust build, so nothing to compile here. Enable the Rust SSE4.1
115- // and AVX2 intrinsics builds. Do not enable AVX-512 in general.
121+fn build_sse41_avx2_rust_intrinsics() {
122+ // No C code to compile here. Set the cfg flags that enable the Rust SSE4.1
123+ // and AVX2 intrinsics modules. The regular Cargo build will compile them.
116124 println!("cargo:rustc-cfg=blake3_sse41_rust");
117125 println!("cargo:rustc-cfg=blake3_avx2_rust");
118126 }
119127
120-fn build_x86_intrinsics() {
121- // Enable the Rust SSE4.1 and AVX2 intrinsics builds, and also the C
122- // AVX-512 intrinsics build. This is required on 32-bit x86 targets, since
123- // the assembly implementations don't support those.
124- assert!(c_compiler_exists_and_supports_avx512());
125- assert!(!is_pure());
126- println!("cargo:rustc-cfg=blake3_sse41_rust");
127- println!("cargo:rustc-cfg=blake3_avx2_rust");
128+fn build_sse41_avx2_assembly() {
129+ // Build the assembly implementations for SSE4.1 and AVX2. This is
130+ // preferred, but it only supports x86_64.
131+ assert!(is_x86_64());
132+ println!("cargo:rustc-cfg=blake3_sse41_ffi");
133+ println!("cargo:rustc-cfg=blake3_avx2_ffi");
134+ let mut build = new_build();
135+ if is_windows_msvc() {
136+ build.file("c/blake3_sse41_x86-64_windows_msvc.asm");
137+ build.file("c/blake3_avx2_x86-64_windows_msvc.asm");
138+ } else if is_windows_gnu() {
139+ build.file("c/blake3_sse41_x86-64_windows_gnu.S");
140+ build.file("c/blake3_avx2_x86-64_windows_gnu.S");
141+ } else {
142+ // All non-Windows implementations are assumed to support
143+ // Linux-style assembly. These files do contain a small
144+ // explicit workaround for macOS also.
145+ build.file("c/blake3_sse41_x86-64_unix.S");
146+ build.file("c/blake3_avx2_x86-64_unix.S");
147+ }
148+ build.compile("blake3_sse41_avx2_assembly");
149+}
150+
151+fn build_avx512_c_intrinsics() {
152+ // This is required on 32-bit x86 targets, since the assembly
153+ // implementation doesn't support support those.
128154 println!("cargo:rustc-cfg=blake3_avx512_ffi");
129155 let mut build = new_build();
130156 build.file("c/blake3_avx512.c");
@@ -138,37 +164,47 @@ fn build_x86_intrinsics() {
138164 // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65782.
139165 build.flag("-fno-asynchronous-unwind-tables");
140166 }
141- build.compile("blake3_ffi");
167+ build.compile("blake3_avx512_intrinsics");
142168 }
143169
144-fn build_x86_asm() {
145- // Enable the assembly builds for SSE4.1, AVX2, and AVX-512. This is the
146- // preferred build configuration by default, but it only supports x86_64.
170+fn build_avx512_assembly() {
171+ // Build the assembly implementation for AVX-512. This is preferred, but it
172+ // only supports x86_64.
147173 assert!(is_x86_64());
148- assert!(c_compiler_exists_and_supports_avx512());
149- assert!(!is_pure());
150- assert!(!should_prefer_intrinsics());
151- println!("cargo:rustc-cfg=blake3_sse41_ffi");
152- println!("cargo:rustc-cfg=blake3_avx2_ffi");
153174 println!("cargo:rustc-cfg=blake3_avx512_ffi");
154175 let mut build = new_build();
155176 if is_windows_msvc() {
156- build.file("c/blake3_sse41_x86-64_windows_msvc.asm");
157- build.file("c/blake3_avx2_x86-64_windows_msvc.asm");
158177 build.file("c/blake3_avx512_x86-64_windows_msvc.asm");
159- } else if is_windows_gnu() {
160- build.file("c/blake3_sse41_x86-64_windows_gnu.S");
161- build.file("c/blake3_avx2_x86-64_windows_gnu.S");
162- build.file("c/blake3_avx512_x86-64_windows_gnu.S");
163178 } else {
164- // All non-Windows implementations are assumed to support
165- // Linux-style assembly. These files do contain a small
166- // explicit workaround for macOS also.
167- build.file("c/blake3_sse41_x86-64_unix.S");
168- build.file("c/blake3_avx2_x86-64_unix.S");
169- build.file("c/blake3_avx512_x86-64_unix.S");
179+ if is_windows_gnu() {
180+ build.file("c/blake3_avx512_x86-64_windows_gnu.S");
181+ } else {
182+ // All non-Windows implementations are assumed to support Linux-style
183+ // assembly. These files do contain a small explicit workaround for
184+ // macOS also.
185+ build.file("c/blake3_avx512_x86-64_unix.S");
186+ }
187+ // Older versions of Clang require these flags, even for assembly. See
188+ // https://github.com/BLAKE3-team/BLAKE3/issues/79.
189+ build.flag("-mavx512f");
190+ build.flag("-mavx512vl");
170191 }
171- build.compile("blake3_ffi");
192+ build.compile("blake3_avx512_assembly");
193+}
194+
195+fn build_neon_c_intrinsics() {
196+ let mut build = new_build();
197+ // Note that blake3_neon.c normally depends on the blake3_portable.c
198+ // for the single-instance compression function, but we expose
199+ // portable.rs over FFI instead. See ffi_neon.rs.
200+ build.file("c/blake3_neon.c");
201+ // ARMv7 platforms that support NEON generally need the following
202+ // flags. AArch64 supports NEON by default and does not support -mpfu.
203+ if is_armv7() {
204+ build.flag("-mfpu=neon-vfpv4");
205+ build.flag("-mfloat-abi=hard");
206+ }
207+ build.compile("blake3_neon");
172208 }
173209
174210 fn main() -> Result<(), Box<dyn std::error::Error>> {
@@ -177,31 +213,26 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
177213 }
178214
179215 if is_x86_64() || is_x86_32() {
180- if is_pure() {
181- build_x86_pure();
182- } else if !c_compiler_exists_and_supports_avx512() {
183- warn("Falling back to the pure Rust implementation, with reduced performance.");
184- build_x86_pure();
216+ let support = c_compiler_support();
217+ if is_x86_32() || should_prefer_intrinsics() || is_pure() || support == NoCompiler {
218+ build_sse41_avx2_rust_intrinsics();
219+ } else {
220+ // We assume that all C compilers can assemble SSE4.1 and AVX2. We
221+ // don't explicitly check for support.
222+ build_sse41_avx2_assembly();
223+ }
224+
225+ if is_pure() || support == NoCompiler || support == NoAVX512 {
226+ // The binary will not include any AVX-512 code.
185227 } else if is_x86_32() || should_prefer_intrinsics() {
186- build_x86_intrinsics();
228+ build_avx512_c_intrinsics();
187229 } else {
188- build_x86_asm();
230+ build_avx512_assembly();
189231 }
190232 }
191233
192234 if is_neon() {
193- let mut build = new_build();
194- // Note that blake3_neon.c normally depends on the blake3_portable.c
195- // for the single-instance compression function, but we expose
196- // portable.rs over FFI instead. See ffi_neon.rs.
197- build.file("c/blake3_neon.c");
198- // ARMv7 platforms that support NEON generally need the following
199- // flags. AArch64 supports NEON by default and does not support -mpfu.
200- if is_armv7() {
201- build.flag("-mfpu=neon-vfpv4");
202- build.flag("-mfloat-abi=hard");
203- }
204- build.compile("blake3_neon");
235+ build_neon_c_intrinsics();
205236 }
206237
207238 // The `cc` crate doesn't automatically emit rerun-if directives for the
--- a/c/blake3_avx512_x86-64_unix.S
+++ b/c/blake3_avx512_x86-64_unix.S
@@ -82,15 +82,15 @@ blake3_hash_many_avx512:
8282 mov r14, qword ptr [rdi+0x50]
8383 mov r15, qword ptr [rdi+0x58]
8484 vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
85- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
85+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
8686 vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
87- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
87+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
8888 vpunpcklqdq zmm8, zmm16, zmm17
8989 vpunpckhqdq zmm9, zmm16, zmm17
9090 vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
91- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
91+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
9292 vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
93- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
93+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
9494 vpunpcklqdq zmm10, zmm18, zmm19
9595 vpunpckhqdq zmm11, zmm18, zmm19
9696 mov r8, qword ptr [rdi+0x20]
@@ -102,15 +102,15 @@ blake3_hash_many_avx512:
102102 mov r14, qword ptr [rdi+0x70]
103103 mov r15, qword ptr [rdi+0x78]
104104 vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
105- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
105+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
106106 vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
107- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
107+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
108108 vpunpcklqdq zmm12, zmm16, zmm17
109109 vpunpckhqdq zmm13, zmm16, zmm17
110110 vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
111- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
111+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
112112 vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
113- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
113+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
114114 vpunpcklqdq zmm14, zmm18, zmm19
115115 vpunpckhqdq zmm15, zmm18, zmm19
116116 vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
@@ -144,15 +144,15 @@ blake3_hash_many_avx512:
144144 mov r14, qword ptr [rdi+0x50]
145145 mov r15, qword ptr [rdi+0x58]
146146 vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
147- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
147+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
148148 vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
149- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
149+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
150150 vpunpcklqdq zmm8, zmm24, zmm25
151151 vpunpckhqdq zmm9, zmm24, zmm25
152152 vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
153- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
153+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
154154 vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
155- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
155+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
156156 vpunpcklqdq zmm10, zmm24, zmm25
157157 vpunpckhqdq zmm11, zmm24, zmm25
158158 prefetcht0 [r8+rdx+0x80]
@@ -172,15 +172,15 @@ blake3_hash_many_avx512:
172172 mov r14, qword ptr [rdi+0x70]
173173 mov r15, qword ptr [rdi+0x78]
174174 vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
175- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
175+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
176176 vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
177- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
177+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
178178 vpunpcklqdq zmm12, zmm24, zmm25
179179 vpunpckhqdq zmm13, zmm24, zmm25
180180 vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
181- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
181+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
182182 vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
183- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
183+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
184184 vpunpcklqdq zmm14, zmm24, zmm25
185185 vpunpckhqdq zmm15, zmm24, zmm25
186186 prefetcht0 [r8+rdx+0x80]
@@ -2039,7 +2039,7 @@ blake3_hash_many_avx512:
20392039 vpermq ymm14, ymm14, 0xDC
20402040 vpermq ymm15, ymm15, 0xDC
20412041 vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
2042- vinserti32x8 zmm13, zmm14, ymm15, 0x01
2042+ vinserti64x4 zmm13, zmm14, ymm15, 0x01
20432043 mov eax, 17476
20442044 kmovw k2, eax
20452045 vpblendmd zmm13 {k2}, zmm13, zmm12
--- a/c/blake3_avx512_x86-64_windows_gnu.S
+++ b/c/blake3_avx512_x86-64_windows_gnu.S
@@ -96,15 +96,15 @@ blake3_hash_many_avx512:
9696 mov r14, qword ptr [rdi+0x50]
9797 mov r15, qword ptr [rdi+0x58]
9898 vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
99- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
99+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
100100 vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
101- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
101+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
102102 vpunpcklqdq zmm8, zmm16, zmm17
103103 vpunpckhqdq zmm9, zmm16, zmm17
104104 vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
105- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
105+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
106106 vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
107- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
107+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
108108 vpunpcklqdq zmm10, zmm18, zmm19
109109 vpunpckhqdq zmm11, zmm18, zmm19
110110 mov r8, qword ptr [rdi+0x20]
@@ -116,15 +116,15 @@ blake3_hash_many_avx512:
116116 mov r14, qword ptr [rdi+0x70]
117117 mov r15, qword ptr [rdi+0x78]
118118 vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
119- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
119+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
120120 vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
121- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
121+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
122122 vpunpcklqdq zmm12, zmm16, zmm17
123123 vpunpckhqdq zmm13, zmm16, zmm17
124124 vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
125- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
125+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
126126 vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
127- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
127+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
128128 vpunpcklqdq zmm14, zmm18, zmm19
129129 vpunpckhqdq zmm15, zmm18, zmm19
130130 vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
@@ -158,15 +158,15 @@ blake3_hash_many_avx512:
158158 mov r14, qword ptr [rdi+0x50]
159159 mov r15, qword ptr [rdi+0x58]
160160 vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
161- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
161+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
162162 vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
163- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
163+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
164164 vpunpcklqdq zmm8, zmm24, zmm25
165165 vpunpckhqdq zmm9, zmm24, zmm25
166166 vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
167- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
167+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
168168 vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
169- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
169+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
170170 vpunpcklqdq zmm10, zmm24, zmm25
171171 vpunpckhqdq zmm11, zmm24, zmm25
172172 prefetcht0 [r8+rdx+0x80]
@@ -186,15 +186,15 @@ blake3_hash_many_avx512:
186186 mov r14, qword ptr [rdi+0x70]
187187 mov r15, qword ptr [rdi+0x78]
188188 vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
189- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
189+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
190190 vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
191- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
191+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
192192 vpunpcklqdq zmm12, zmm24, zmm25
193193 vpunpckhqdq zmm13, zmm24, zmm25
194194 vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
195- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
195+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
196196 vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
197- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
197+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
198198 vpunpcklqdq zmm14, zmm24, zmm25
199199 vpunpckhqdq zmm15, zmm24, zmm25
200200 prefetcht0 [r8+rdx+0x80]
@@ -2065,7 +2065,7 @@ blake3_hash_many_avx512:
20652065 vpermq ymm14, ymm14, 0xDC
20662066 vpermq ymm15, ymm15, 0xDC
20672067 vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
2068- vinserti32x8 zmm13, zmm14, ymm15, 0x01
2068+ vinserti64x4 zmm13, zmm14, ymm15, 0x01
20692069 mov eax, 17476
20702070 kmovw k2, eax
20712071 vpblendmd zmm13 {k2}, zmm13, zmm12
--- a/c/blake3_avx512_x86-64_windows_msvc.asm
+++ b/c/blake3_avx512_x86-64_windows_msvc.asm
@@ -99,15 +99,15 @@ innerloop16:
9999 mov r14, qword ptr [rdi+50H]
100100 mov r15, qword ptr [rdi+58H]
101101 vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
102- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
102+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
103103 vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
104- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
104+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
105105 vpunpcklqdq zmm8, zmm16, zmm17
106106 vpunpckhqdq zmm9, zmm16, zmm17
107107 vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
108- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
108+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
109109 vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
110- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
110+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
111111 vpunpcklqdq zmm10, zmm18, zmm19
112112 vpunpckhqdq zmm11, zmm18, zmm19
113113 mov r8, qword ptr [rdi+20H]
@@ -119,15 +119,15 @@ innerloop16:
119119 mov r14, qword ptr [rdi+70H]
120120 mov r15, qword ptr [rdi+78H]
121121 vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
122- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
122+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
123123 vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
124- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
124+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
125125 vpunpcklqdq zmm12, zmm16, zmm17
126126 vpunpckhqdq zmm13, zmm16, zmm17
127127 vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
128- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
128+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
129129 vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
130- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
130+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
131131 vpunpcklqdq zmm14, zmm18, zmm19
132132 vpunpckhqdq zmm15, zmm18, zmm19
133133 vmovdqa32 zmm27, zmmword ptr [INDEX0]
@@ -161,15 +161,15 @@ innerloop16:
161161 mov r14, qword ptr [rdi+50H]
162162 mov r15, qword ptr [rdi+58H]
163163 vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
164- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
164+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
165165 vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
166- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
166+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
167167 vpunpcklqdq zmm8, zmm24, zmm25
168168 vpunpckhqdq zmm9, zmm24, zmm25
169169 vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
170- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
170+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
171171 vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
172- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
172+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
173173 vpunpcklqdq zmm10, zmm24, zmm25
174174 vpunpckhqdq zmm11, zmm24, zmm25
175175 prefetcht0 byte ptr [r8+rdx+80H]
@@ -189,15 +189,15 @@ innerloop16:
189189 mov r14, qword ptr [rdi+70H]
190190 mov r15, qword ptr [rdi+78H]
191191 vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
192- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
192+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
193193 vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
194- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
194+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
195195 vpunpcklqdq zmm12, zmm24, zmm25
196196 vpunpckhqdq zmm13, zmm24, zmm25
197197 vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
198- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
198+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
199199 vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
200- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
200+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
201201 vpunpcklqdq zmm14, zmm24, zmm25
202202 vpunpckhqdq zmm15, zmm24, zmm25
203203 prefetcht0 byte ptr [r8+rdx+80H]
@@ -2073,7 +2073,7 @@ final7blocks:
20732073 vpermq ymm14, ymm14, 0DCH
20742074 vpermq ymm15, ymm15, 0DCH
20752075 vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN]
2076- vinserti32x8 zmm13, zmm14, ymm15, 01H
2076+ vinserti64x4 zmm13, zmm14, ymm15, 01H
20772077 mov eax, 17476
20782078 kmovw k2, eax
20792079 vpblendmd zmm13 {k2}, zmm13, zmm12
--- a/c/blake3_dispatch.c
+++ b/c/blake3_dispatch.c
@@ -182,7 +182,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
182182 #if defined(IS_X86)
183183 const enum cpu_feature features = get_cpu_features();
184184 #if !defined(BLAKE3_NO_AVX512)
185- if (features & AVX512F) {
185+ if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
186186 blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
187187 increment_counter, flags, flags_start, flags_end,
188188 out);
@@ -223,7 +223,7 @@ size_t blake3_simd_degree(void) {
223223 #if defined(IS_X86)
224224 const enum cpu_feature features = get_cpu_features();
225225 #if !defined(BLAKE3_NO_AVX512)
226- if (features & AVX512F) {
226+ if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
227227 return 16;
228228 }
229229 #endif
--- a/src/platform.rs
+++ b/src/platform.rs
@@ -276,7 +276,6 @@ impl Platform {
276276 }
277277
278278 #[cfg(feature = "neon")]
279- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
280279 pub fn neon() -> Option<Self> {
281280 // Assumed to be safe if the "neon" feature is on.
282281 Some(Self::NEON)
--- /dev/null
+++ b/tools/compiler_version/Cargo.toml
@@ -0,0 +1,7 @@
1+[package]
2+name = "compiler_version"
3+version = "0.0.0"
4+edition = "2018"
5+
6+[build-dependencies]
7+cc = "1.0.50"
--- /dev/null
+++ b/tools/compiler_version/build.rs
@@ -0,0 +1,6 @@
1+fn main() {
2+ let build = cc::Build::new();
3+ let compiler = build.get_compiler();
4+ let compiler_path = compiler.path().to_string_lossy();
5+ println!("cargo:rustc-env=COMPILER_PATH={}", compiler_path);
6+}
--- /dev/null
+++ b/tools/compiler_version/src/main.rs
@@ -0,0 +1,27 @@
1+use std::process::Command;
2+
3+fn main() {
4+ // Print the rustc version.
5+ Command::new(env!("CARGO"))
6+ .args(&["rustc", "--quiet", "--", "--version"])
7+ .status()
8+ .unwrap();
9+ println!();
10+
11+ // Print the Cargo version.
12+ Command::new(env!("CARGO"))
13+ .args(&["--version"])
14+ .status()
15+ .unwrap();
16+ println!();
17+
18+ // Print the C compiler version. This relies on C compiler detection done
19+ // in build.rs, which sets the COMPILER_PATH variable.
20+ let compiler_path = env!("COMPILER_PATH");
21+ let mut compiler_command = Command::new(compiler_path);
22+ // Use the --version flag on everything other than MSVC.
23+ if !cfg!(target_env = "msvc") {
24+ compiler_command.arg("--version");
25+ }
26+ let _ = compiler_command.status().unwrap();
27+}
--- /dev/null
+++ b/tools/instruction_set_support/Cargo.toml
@@ -0,0 +1,6 @@
1+[package]
2+name = "instruction_set_support"
3+version = "0.0.0"
4+edition = "2018"
5+
6+[dependencies]